# Base

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AudioNet(nn.Module):
    def __init__(self, params_dict):
        super().__init__()

        self.in_ch = params_dict['in_ch']
        self.num_filters1 = params_dict['num_filters1']
        self.num_filters2 = params_dict['num_filters2']
        self.num_hidden = params_dict['num_hidden']
        self.out_size = params_dict['out_size']

        self.conv1 = nn.Sequential(
            nn.Conv1d(self.in_ch, self.num_filters1, 10, 1),
            nn.BatchNorm1d(self.num_filters1),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(self.num_filters1, self.num_filters2, 10, 1),
            nn.BatchNorm1d(self.num_filters2),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.pool = nn.AvgPool1d(10, 10)

        self._to_linear = self._get_to_linear()

        self.fc1 = nn.Linear(self._to_linear, self.num_hidden)
        self.fc2 = nn.Linear(self.num_hidden, self.out_size)
        self.drop = nn.Dropout(0.5)
        self.act = nn.ReLU()

    def _get_to_linear(self):

        x = torch.randn(64, self.in_ch, 4501)  
        with torch.no_grad():
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.pool(x)
        return x.numel() // x.shape[0]

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool(x)
        

        x = x.view(-1, self._to_linear) 

        x = self.fc1(x)
        x = self.drop(x)
        x = self.act(x)
        x = self.fc2(x)

        return x

# TCN

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super().__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()

class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super().__init__()
        self.conv1 = nn.Conv1d(n_inputs, n_outputs, kernel_size,
                               stride=stride, padding=padding, dilation=dilation)
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(n_outputs, n_outputs, kernel_size,
                               stride=stride, padding=padding, dilation=dilation)
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)

        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TCN(nn.Module):
    def __init__(self, input_channel, num_channels, kernel_size=2, dropout=0.2):
        super().__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = input_channel if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers.append(TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                        padding=(kernel_size-1) * dilation_size, dropout=dropout))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

class AudioNet(nn.Module):
    def __init__(self, params_dict):
        super().__init__()

        self.in_ch = params_dict['in_ch']
        self.num_filters1 = params_dict['num_filters1']
        self.num_filters2 = params_dict['num_filters2']
        self.num_hidden = params_dict['num_hidden']
        self.out_size = params_dict['out_size']

        self.conv1 = nn.Sequential(
            nn.Conv1d(self.in_ch, self.num_filters1, 10, 1),
            nn.BatchNorm1d(self.num_filters1),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(self.num_filters1, self.num_filters2, 10, 1),
            nn.BatchNorm1d(self.num_filters2),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.pool = nn.AvgPool1d(10, 10)

        self.tcn = TCN(self.num_filters2, [self.num_filters2 * 2, self.num_filters2 * 4], kernel_size=8, dropout=0.25)

        self._to_linear = self._get_to_linear()

        self.fc1 = nn.Linear(self._to_linear, self.num_hidden)
        self.fc2 = nn.Linear(self.num_hidden, self.out_size)
        self.drop = nn.Dropout(0.5)
        self.act = nn.ReLU()

    def _get_to_linear(self):
        x = torch.randn(64, self.in_ch, 4501)
        with torch.no_grad():
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.pool(x)
            x = self.tcn(x) 
        return x.numel() // x.shape[0]

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool(x)
        x = self.tcn(x)
        x = x.view(-1, self._to_linear)
        x = self.fc1(x)
        x = self.drop(x)
        x = self.act(x)
        x = self.fc2(x)
        return x

# MSCNN

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiScaleConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernels=[3, 5, 7], stride=1, padding=1):
        super(MultiScaleConv, self).__init__()
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels, out_channels, kernel_size=k, stride=stride, padding=k//2)
            for k in kernels
        ])
        self.batch_norm = nn.BatchNorm1d(out_channels * len(kernels))
        self.relu = nn.ReLU()

    def forward(self, x):
        outputs = [conv(x) for conv in self.convs]
        x = torch.cat(outputs, dim=1)
        x = self.batch_norm(x)
        x = self.relu(x)
        return x

class AudioNet(nn.Module):
    def __init__(self, params_dict):
        super(AudioNet, self).__init__()
        self.in_ch = params_dict['in_ch']
        self.num_filters1 = params_dict['num_filters1']
        self.num_filters2 = params_dict['num_filters2']
        self.num_hidden = params_dict['num_hidden']
        self.out_size = params_dict['out_size']

        self.conv1 = nn.Sequential(
            nn.Conv1d(self.in_ch, self.num_filters1, 10, 1),
            nn.BatchNorm1d(self.num_filters1),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(self.num_filters1, self.num_filters2, 10, 1),
            nn.BatchNorm1d(self.num_filters2),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.multi_scale = MultiScaleConv(self.num_filters2, self.num_filters2)

        self.pool = nn.AvgPool1d(10, 10)
        self.fc1 = nn.Linear(self._get_to_linear(), self.num_hidden)
        self.fc2 = nn.Linear(self.num_hidden, self.out_size)
        self.drop = nn.Dropout(0.5)
        self.act = nn.ReLU()

    def _get_to_linear(self):
        x = torch.randn(64, self.num_filters2, 501)  
        x = self.multi_scale(x)
        x = self.pool(x)
        return x.numel() // x.shape[0]

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.multi_scale(x)
        x = self.pool(x)
        x = x.view(-1, self._get_to_linear())
        x = self.fc1(x)
        x = self.drop(x)
        x = self.act(x)
        x = self.fc2(x)
        return x

# BI LSTM and Soft Attention

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()
        self.feature_dim = feature_dim
        self.attention = nn.Sequential(
            nn.Linear(feature_dim, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        scores = self.attention(x)
        alpha = F.softmax(scores, dim=1)
        attended_features = x * alpha
        return attended_features.view(-1, self.feature_dim)

class AudioNet(nn.Module):
    def __init__(self, params_dict):
        super().__init__()
        self.in_ch = params_dict['in_ch']
        self.num_filters1 = params_dict['num_filters1']
        self.num_filters2 = params_dict['num_filters2']
        self.num_hidden = params_dict['num_hidden']
        self.out_size = params_dict['out_size']

        self.conv1 = nn.Sequential(
            nn.Conv1d(self.in_ch, self.num_filters1, 10, 1),
            nn.BatchNorm1d(self.num_filters1),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(self.num_filters1, self.num_filters2, 10, 1),
            nn.BatchNorm1d(self.num_filters2),
            nn.ReLU(),
            nn.AvgPool1d(2, 2)
        )
        self.lstm = nn.LSTM(self.num_filters2, self.num_hidden, batch_first=True, bidirectional=True)
        self.attention = Attention(self.num_hidden * 2, self.num_hidden * 2)

        self.fc = nn.Linear(self.num_hidden * 2, self.out_size)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.permute(0, 2, 1)
        x, (hidden, cell) = self.lstm(x)
        x = self.attention(x)
        x = self.fc(x)
        return x

# Attention Scores

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()
        self.feature_dim = feature_dim
        self.attention = nn.Sequential(
            nn.Linear(feature_dim, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        scores = self.attention(x)
        alpha = F.softmax(scores, dim=1)
        attended_features = x * alpha
        return attended_features.view(-1, self.feature_dim)

class AudioNet(nn.Module):
    def __init__(self, params_dict):
        super(AudioNet, self).__init__()
        self.in_ch = params_dict.get('in_ch', 1)
        self.num_filters1 = params_dict.get('num_filters1', 32)
        self.num_filters2 = params_dict.get('num_filters2', 64)
        self.num_hidden = params_dict.get('num_hidden', 128)
        self.out_size = params_dict.get('out_size', 1)

        self.conv1 = nn.Sequential(
            nn.Conv1d(self.in_ch, self.num_filters1, kernel_size=10, stride=1),
            nn.BatchNorm1d(self.num_filters1),
            nn.ReLU(inplace=True),
            nn.AvgPool1d(kernel_size=2, stride=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(self.num_filters1, self.num_filters2, kernel_size=10, stride=1),
            nn.BatchNorm1d(self.num_filters2),
            nn.ReLU(inplace=True),
            nn.AvgPool1d(kernel_size=2, stride=2)
        )
        self.pool = nn.AvgPool1d(kernel_size=10, stride=10)

        self._to_linear = None
        self.attention = Attention(self._get_to_linear())

        self.fc1 = nn.Linear(self._get_to_linear(), self.num_hidden)
        self.fc2 = nn.Linear(self.num_hidden, self.out_size)
        self.drop = nn.Dropout(p=0.5)
        self.act = nn.ReLU(inplace=True)

    def _get_to_linear(self):
        if self._to_linear is None: 
            x = torch.randn(1, self.in_ch, 4501)
            with torch.no_grad():
                x = self.conv1(x)
                x = self.conv2(x)
                x = self.pool(x)
                self._to_linear = x.numel() // x.shape[0]
        return self._to_linear

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool(x)
        x = x.view(-1, self._get_to_linear())
        x = self.attention(x)
        x = self.fc1(x)
        x = self.drop(x)
        x = self.act(x)
        x = self.fc2(x)
        return x.to(x.device)