In [3]:
import json
import os

import torch
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader
import torchinfo

from pitch_tracker.utils import dataset, files
from pitch_tracker.utils.constants import (F_MIN, HOP_LENGTH, N_CLASS, N_FFT,
                                           N_MELS, PICKING_FRAME_SIZE,
                                           PICKING_FRAME_STEP,
                                           PICKING_FRAME_TIME, SAMPLE_RATE,
                                           STEP_FRAME, STEP_TIME, WIN_LENGTH)
from pitch_tracker.utils.dataset import AudioDataset
from pitch_tracker.ml.net import create_conv2d_block
from pitch_tracker.ml.train_model import train_model, train, test

In [4]:
device = "cuda" if torch.cuda.is_available() \
    else "mps" if torch.backends.mps.is_available() \
    else "cpu"

# device = "cpu"

print(f"Using {device} device")

Using mps device


In [7]:
hop_size = 512
step_frame = 2
DATASET_DIR = f'../content/pickled_database/{hop_size}_{step_frame}/'
DATA_SPLIT_PATH = '../pitch_tracker/utils/data_split.json'


with open(DATA_SPLIT_PATH, 'r') as f:
    splits = json.load(f)
train_set = [os.path.join(DATASET_DIR, song_name) for song_name in splits['train']]
validation_set = [os.path.join(DATASET_DIR, song_name) for song_name in splits['validation']]
test_set = [os.path.join(DATASET_DIR, song_name) for song_name in splits['test']]

print(f'train_set: {len(train_set)}')
print(f'validation_set: {len(validation_set)}')
print(f'test_set: {len(test_set)}')

train_set: 66
validation_set: 15
test_set: 27


In [8]:
train_dataset = AudioDataset(train_set)
validation_dataset = AudioDataset(validation_set)
test_dataset = AudioDataset(test_set)

# affect GPU dedicated memory
batch_size = 4

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [11]:
class Test_Model(nn.Module):
    def __init__(self):
        super(Test_Model, self).__init__()
        self.conv2d_block1 = create_conv2d_block(
            conv2d_input=(1,128,5),
            padding='same',
            maxpool_kernel_size=None,
        )
        
        self.conv2d_block2 = create_conv2d_block(
            conv2d_input=(128,64,3),
            padding='same',
            maxpool_kernel_size=(1,5),
        )

        self.conv2d_block3 = create_conv2d_block(
            conv2d_input=(64,64,3),
            padding='same',
            maxpool_kernel_size=(1,5),
        )
        
        # self.conv2d_block4 = create_conv2d_block(
        #     conv2d_input=(64,64,3),
        #     padding='same',
        #     maxpool_kernel_size=(1,5),
        # )

        self.flatten_layer = nn.Flatten(start_dim=2)

        self.gru = nn.GRU(
            input_size=1260,
            hidden_size=88,
            batch_first=True,
            bidirectional=True,
            dropout=0.2,
        )
        
        # self.dense_layer = nn.LazyLinear(88)
        # self.output_layer = nn.Linear(128, 88)
        
    def forward(self, x):
        x = self.conv2d_block1(x)
        x = self.conv2d_block2(x)
        x = self.conv2d_block3(x)
        # x = self.conv2d_block4(x)
        flat = self.flatten_layer(x)
        sequence, h_n = self.gru(flat)
        # out = self.dense_layer(sequence)
        # x = self.output_layer(x)
        return sequence
        

In [12]:
model = Test_Model().to('cpu')
dummy_in_shape = [1] + list(train_dataset.__getitem__(0)[0].shape)
dummy_in = torch.randn(dummy_in_shape)
print(f'Input size: {tuple(dummy_in.shape)}')
print(f'Output size: {tuple(model(dummy_in).shape)}')
# del dummy_in

Input size: (1, 1, 420, 88)
Output size: (1, 64, 176)


In [13]:
torchinfo.summary(model, input_size=dummy_in_shape, device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
Test_Model                               [1, 64, 176]              --
├─Sequential: 1-1                        [1, 128, 420, 88]         --
│    └─Conv2d: 2-1                       [1, 128, 420, 88]         3,328
│    └─ReLU: 2-2                         [1, 128, 420, 88]         --
│    └─BatchNorm2d: 2-3                  [1, 128, 420, 88]         256
├─Sequential: 1-2                        [1, 64, 420, 17]          --
│    └─Conv2d: 2-4                       [1, 64, 420, 88]          73,792
│    └─ReLU: 2-5                         [1, 64, 420, 88]          --
│    └─BatchNorm2d: 2-6                  [1, 64, 420, 88]          128
│    └─MaxPool2d: 2-7                    [1, 64, 420, 17]          --
├─Sequential: 1-3                        [1, 64, 420, 3]           --
│    └─Conv2d: 2-8                       [1, 64, 420, 17]          36,928
│    └─ReLU: 2-9                         [1, 64, 420, 17]          --
│ 

In [14]:
model = Test_Model().to(device)
# loss_fn = nn.BCELoss()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# print(model)



In [6]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_model(model, validation_dataloader,loss_fn, optimizer, device)
    test_model(model, test_dataloader, loss_fn, device)
print("Done!")

Epoch 1
-------------------------------
[    1/  648]  Batch Accuracy: 6.2%, current loss: 13.060298
[   50/  648]  Batch Accuracy: 0.0%, current loss: 641.974011
[   99/  648]  Batch Accuracy: 0.5%, current loss: 1269.609948
[  148/  648]  Batch Accuracy: 0.0%, current loss: 1896.429015
[  197/  648]  Batch Accuracy: 0.0%, current loss: 2522.489054
[  246/  648]  Batch Accuracy: 0.0%, current loss: 3148.550686
[  295/  648]  Batch Accuracy: 1.4%, current loss: 3774.858865
[  344/  648]  Batch Accuracy: 1.4%, current loss: 4400.766545
[  393/  648]  Batch Accuracy: 1.0%, current loss: 5026.630705
[  442/  648]  Batch Accuracy: 0.0%, current loss: 5653.060153
[  491/  648]  Batch Accuracy: 0.0%, current loss: 6279.430711
[  540/  648]  Batch Accuracy: 1.0%, current loss: 6904.975354
[  589/  648]  Batch Accuracy: 0.0%, current loss: 7530.056245
[  638/  648]  Batch Accuracy: 0.0%, current loss: 8155.244347
[  648/  648]  Total Accuracy: 2.9%, Avg loss: 12.781859
Test Error: 
 Accuracy: 

In [64]:
test_model(model, validation_dataloader, loss_fn, device)


Test Error: 
 Accuracy: 3.6%, Avg loss: 0.300722 



In [23]:
X, (y1,y2,y3) = next(iter(train_dataloader))
X = X.to(device)
y3 = y3.to(device)
y_pred = model(X)

In [24]:
y_pred

tensor([[[-0.2972,  0.3336, -0.1609,  ..., -0.0524, -0.0204, -0.8517],
         [-0.1902,  0.0582,  0.1809,  ...,  0.0170,  0.2186, -0.5805],
         [-0.3692, -0.0507,  0.0995,  ..., -0.0526,  0.0991, -0.6397],
         ...,
         [-0.5017, -0.3042, -0.5330,  ..., -0.0360, -0.4683,  0.2623],
         [-0.4445,  0.0660, -0.5317,  ..., -0.0100, -0.4262,  0.6025],
         [-0.2346, -0.5030, -0.2007,  ..., -0.0692,  0.0729, -0.1966]]],
       grad_fn=<ViewBackward0>)

In [25]:
loss = loss_fn(y_pred, y3)

In [27]:
loss.item()

13.041638374328613

In [20]:
y_pred.size()
y3.size()

torch.Size([4, 210, 88])

In [47]:
y_pred.numel()

73920

In [51]:
y_pred.argmax(2).flatten() == y3.argmax(2).flatten()

torch.Size([840])

In [62]:
(y_pred.argmax(2) == y3.argmax(2)).type(torch.float).sum().item()

11.0

In [55]:
pos_neg_matrix = (y_pred.argmax(2) == y3.argmax(2)).flatten()
n_size = pos_neg_matrix.numel()
n_correct = torch.nonzero(pos_neg_matrix).numel()

In [8]:
from datetime import datetime


date_time = datetime.now().strftime('%Y%m%d-%H%M%S')
date_time

'20230310-171903'