In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import json
import os

import torch
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader
import torchinfo

from pitch_tracker.utils import dataset, files
from pitch_tracker.utils.constants import (F_MIN, HOP_LENGTH, N_CLASS, N_FFT,
                                           N_MELS, PICKING_FRAME_SIZE,
                                           PICKING_FRAME_STEP,
                                           PICKING_FRAME_TIME, SAMPLE_RATE,
                                           STEP_FRAME, STEP_TIME, WIN_LENGTH)
from pitch_tracker.utils.dataset import AudioDataset
from pitch_tracker.ml.net import create_conv2d_block
from pitch_tracker.ml.train_model import train_model, train, test

In [4]:
device = "cuda" if torch.cuda.is_available() \
    else "mps" if torch.backends.mps.is_available() \
    else "cpu"

print(f"Using {device} device")

Using mps device


In [5]:
stft_hop_size = 512
step_frame = 5
onset_frame_time = stft_hop_size*step_frame/SAMPLE_RATE
pick_frame_time = PICKING_FRAME_SIZE * onset_frame_time

DATASET_DIR = f'../content/pickled_database/{stft_hop_size}_{step_frame}/'
DATA_SPLIT_PATH = dataset.DATA_SPLIT_PATH

train_df, validation_df, test_df = dataset.split_dataset_df('thesis', pickled_data_dir=DATASET_DIR)
train_set, validation_set, test_set = train_df['pickled_path'], validation_df['pickled_path'], test_df['pickled_path']


Missing tracks: 0 []
Missing tracks: 0 []
train_set: 54
validation_set: 27
test_set: 27


In [6]:
train_dataset = AudioDataset(train_set)
validation_dataset = AudioDataset(validation_set)
test_dataset = AudioDataset(test_set)

# affect GPU dedicated memory
batch_size = 4

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [21]:
class Test_Model(nn.Module):
    def __init__(self):
        super(Test_Model, self).__init__()
        self.conv2d_block1 = create_conv2d_block(
            conv2d_input=(1,64,3),
            padding='same',
            maxpool_kernel_size=None,
        )
        
        self.conv2d_block2 = create_conv2d_block(
            conv2d_input=(64,64,3),
            padding='same',
            maxpool_kernel_size=(1,5),
        )

        self.conv2d_block3 = create_conv2d_block(
            conv2d_input=(64,64,3),
            padding='same',
            maxpool_kernel_size=(1,5),
        )
        
        self.conv2d_block4 = create_conv2d_block(
            conv2d_input=(64,210,3),
            padding='same',
            # maxpool_kernel_size=(1,5),
        )

        self.flatten_layer = nn.Flatten(start_dim=2)
        self.gru = nn.GRU(
            input_size=3150,
            hidden_size=88,
            batch_first=True,
            dropout=0.2
        )
        self.dense_layer = nn.Linear(88,88)
        # self.output_layer = nn.Linear(128, 88)
        
    def forward(self, x):
        x = self.conv2d_block1(x)
        x = self.conv2d_block2(x)
        x = self.conv2d_block3(x)
        x = self.conv2d_block4(x)
        flat = self.flatten_layer(x)
        sequence, h_n = self.gru(flat)
        out = self.dense_layer(sequence)
        # x = self.output_layer(x)
        return out
            



In [22]:
model = Test_Model().to('cpu')
dummy_in_shape = [1] + list(train_dataset.__getitem__(0)[0].shape)
dummy_in = torch.randn(dummy_in_shape)
print(f'Input size: {tuple(dummy_in.shape)}')
print(f'Output size: {tuple(model(dummy_in).shape)}')
# del dummy_in

Input size: (1, 1, 1050, 88)
Output size: (1, 210, 88)


In [23]:
torchinfo.summary(model, input_size=dummy_in_shape, device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
Test_Model                               [1, 210, 88]              --
├─Sequential: 1-1                        [1, 64, 1050, 88]         --
│    └─Conv2d: 2-1                       [1, 64, 1050, 88]         640
│    └─ReLU: 2-2                         [1, 64, 1050, 88]         --
│    └─BatchNorm2d: 2-3                  [1, 64, 1050, 88]         128
├─Sequential: 1-2                        [1, 64, 1050, 17]         --
│    └─Conv2d: 2-4                       [1, 64, 1050, 88]         36,928
│    └─ReLU: 2-5                         [1, 64, 1050, 88]         --
│    └─BatchNorm2d: 2-6                  [1, 64, 1050, 88]         128
│    └─MaxPool2d: 2-7                    [1, 64, 1050, 17]         --
├─Sequential: 1-3                        [1, 64, 1050, 3]          --
│    └─Conv2d: 2-8                       [1, 64, 1050, 17]         36,928
│    └─ReLU: 2-9                         [1, 64, 1050, 17]         --
│   

In [24]:
model = Test_Model().to(device)
# loss_fn = nn.BCELoss()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# print(model)

In [25]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(model, validation_dataloader,loss_fn, optimizer, device)
    test(model, test_dataloader, loss_fn, device)
print("Done!")

Epoch 1
-------------------------------


  batch_correct = torch.nonzero(pos_neg_arr).numel()


[    1/  191]  Batch Accuracy: 0.0%, current loss:     nan


KeyboardInterrupt: 

In [64]:
test(model, test_dataloader, loss_fn, device)


Test Error: 
 Accuracy: 3.6%, Avg loss: 0.300722 



In [26]:
X, (y1,y2,y3) = next(iter(train_dataloader))
X = X.to(device)
y3 = y3.to(device)
y_pred = model(X)

In [27]:
y_pred

tensor([[[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         ...,
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan]],

        [[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         ...,
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan]],

        [[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         ...,
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan]],

        [[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [na