## Config

In [11]:
no_classes = 25
sequence_lenghth = 500
sampling_freq = 44100
n_fft = 2048
mel_bands = 64
batch_size = 64
cnn_filter = 3
gru_hidden_layers = 64
dropout = 0.3

## Initialization

In [5]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


## Create model

In [7]:
import torch
import torch.nn as nn

def clip_mse(output, target):

    loss_function = torch.nn.MSELoss(reduction='mean')
    loss = loss_function(output, target)

    return loss


class my_CRNN(nn.Module):
    def __init__(self, classes_num, cnn_filters, rnn_hid, _dropout_rate):
        super(my_CRNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=cnn_filters, kernel_size=(3, 3), padding='same')
        self.batch_norm1 = nn.BatchNorm2d(num_features=cnn_filters)
        
        self.conv2 = nn.Conv2d(in_channels=cnn_filters, out_channels=cnn_filters, kernel_size=(3, 3), padding='same')
        self.batch_norm2 = nn.BatchNorm2d(num_features=cnn_filters)
        
        self.conv3 = nn.Conv2d(in_channels=cnn_filters, out_channels=cnn_filters, kernel_size=(3, 3), padding='same')
        self.batch_norm3 = nn.BatchNorm2d(num_features=cnn_filters)
        
        self.pool1 = nn.MaxPool2d(kernel_size=(1, 8))
        self.pool2 = nn.MaxPool2d(kernel_size=(1, 4))
        self.pool3 = nn.MaxPool2d(kernel_size=(1, 2))
        
        self.dropout = nn.Dropout(_dropout_rate)

        self.gru1 = nn.GRU(int(3*cnn_filters), rnn_hid, bidirectional=True, batch_first=True)

        self.linear1 = nn.Linear(rnn_hid*2, rnn_hid)


        self.linear2 = nn.Linear(rnn_hid, classes_num)

    def forward(self, input):

        x = self.conv1(input[:,None,:,:])

        x = self.batch_norm1(x)
        x = torch.relu(x)
        x = self.pool1(x)
        x = self.dropout(x)
        
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x = torch.relu(x)
        x = self.pool2(x)
        x = self.dropout(x)

        x = self.conv3(x)
        x = self.batch_norm3(x)
        x = torch.relu(x)
        x = self.pool3(x)
        x = self.dropout(x)

        x = x.permute(0, 2, 1, 3)
        x = x.reshape((x.shape[0], x.shape[1], -1))
        
        # Bidirectional layer
        recurrent, _ = self.gru1(x)
        x = self.linear1(recurrent)
        x = self.linear2(x)
        
        return x

In [13]:
model = my_CRNN(classes_num=no_classes, cnn_filters=cnn_filter, rnn_hid=gru_hidden_layers, _dropout_rate=dropout)
print(model)

my_CRNN(
  (conv1): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (batch_norm1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (batch_norm2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (batch_norm3): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=(1, 8), stride=(1, 8), padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=(1, 4), stride=(1, 4), padding=0, dilation=1, ceil_mode=False)
  (pool3): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.3, inplace=False)
  (gru1): GRU(9, 64, batch_first=True, bidirectional=True)
  (linear1): Linear(in_features=128, out_features=64, bias=True)
  (linear2): Linear(in_featur

In [14]:
## Load and normalize data

In [None]:
## Train

In [15]:
# Evaluate