In [5]:
from copy import deepcopy, copy
import os
import pathlib
from random import shuffle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, ConcatDataset
from livelossplot import PlotLosses

from chord_recognition.augmentations import SemitoneShift
from chord_recognition.dataset import ChromaDataset, BatchIterator, flatten_iterator
from chord_recognition.cnn import model

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 5)

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
dataset = []
for ds_name in ('beatles', 'queen', 'robbie_williams'):
    dataset.append(
        ChromaDataset(audio_dir=f'data/{ds_name}/mp3/',
        ann_dir=f'data/{ds_name}/chordlabs/',
        window_size=8192, hop_length=4096)
    )
dataset = ConcatDataset(dataset)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

batch_iter = BatchIterator(dataset, batch_size=32)
shift = SemitoneShift(p=1.0, max_shift=4, bins_per_semitone=2)
train_dataset = flatten_iterator(shift(batch_iter))

loader_train = DataLoader(train_dataset, shuffle=True, num_workers=0, batch_size=32)
loader_val = DataLoader(test_dataset, shuffle=True, num_workers=0, batch_size=32)

dataloaders = {
    "train": loader_train,
    "val": loader_val
}

In [7]:
def save_checkpoint(state_dict, is_best, filename='new_etd_best_model.pt'):
    if is_best:
        torch.save(state_dict, f'chord_recognition/models/{filename}')

In [8]:
def train_model(model, optimizer, dataloaders, device, epochs=1):
    liveloss = PlotLosses()
    model = model.to(device=device)
    
    best_acc = 0
    for e in range(epochs):
        logs = {}
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # put model to training mode
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
        
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device=device, dtype=torch.float32)
                labels = labels.to(device=device, dtype=torch.long)
                print(inputs.shape)
                print(labels.shape)
                break
                labels = torch.argmax(labels, 1)
                if phase == 'train':
                    # Zero out all of the gradients for the variables which the optimizer
                    # will update.
                    optimizer.zero_grad()
            
                scores = model(inputs)
                scores = scores.squeeze(3).squeeze(2)                
                loss = F.cross_entropy(scores, labels)
                
                _, preds = torch.max(scores, 1)
                running_corrects += torch.sum(preds == labels)

        
                if phase == 'train':        
                    # This is the backwards pass: compute the gradient of the loss with
                    # respect to each  parameter of the model.
                    loss.backward()
        
                    # Actually update the parameters of the model using the gradients
                    # computed by the backwards pass.
                    optimizer.step()
            
                running_loss += loss.detach() * inputs.size(0)
        
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)
            prefix = ''
            if phase == 'val':
                prefix = 'val_'
                is_best = epoch_acc > best_acc
                best_acc = max(epoch_acc, best_acc)
                save_checkpoint(model.state_dict(), is_best)
            
            logs[prefix + ' log loss'] = epoch_loss.item()
            logs[prefix + 'accuracy'] = epoch_acc.item()
        
        liveloss.update(logs)
        liveloss.send()

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [10]:
learning_rate = 1e-3
optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), weight_decay=1e-7)

train_model(model, optimizer, dataloaders, device, 50)

torch.Size([32, 1, 105, 15])
torch.Size([32, 25])


AttributeError: 'int' object has no attribute 'float'

In [None]:
# Eliminate overfitting:
# - Emulates a slight detuning by shifting the spectrogram by fractions of up to 0.4 of a semitone
# - Try smaller sets of features
# - Try increasing lambda