In [4]:
import itertools
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

PROJECT_DIR = os.path.dirname(os.getcwd())
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

from chord_recognition.cache import HDF5Cache
from chord_recognition.cnn import deep_auditory_v2
from chord_recognition.dataset import ChromaDataset, prepare_datasource, undersample_dataset
from chord_recognition.utils import standardize
from chord_recognition.train import get_weighted_random_sampler, Solver

torch.manual_seed(2020)
RANDOM_STATE = 42

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 5)

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
excluded_files = (
    # zweieck
    '09_-_Mr_Morgan',
    '01_-_Spiel_Mir_Eine_Alte_Melodie',
    '11_-_Ich_Kann_Heute_Nicht',
    # queen
    '14 Hammer To Fall',
    '08 Save Me',
    # robbie_williams
    '11-Man Machine',
    '01-Ghosts',
    '11-A Place To Crash',
    '08-Heaven From Here',
    '09-Random Acts Of Kindness',
    '05-South Of The Border',
)
ds = prepare_datasource(
    ('zweieck', 'queen', 'robbie_williams'),
    excluded_files=excluded_files)

allowed_files = (
    '06-Mr_Moonlight',
    '06-Yellow_Submarine',
    '03-I_m_Only_Sleeping',
    '09-Penny_Lane',
    '12-Wait',
    '11-Do_You_Want_To_Know_A_Secret',
    '12-A_Taste_Of_Honey',
    '04-I_m_Happy_Just_To_Dance_With_You',
    '03-If_I_Fell',
    '10-I_m_Looking_Through_You',
    '09-When_I_m_Sixty-Four',
    '06-Till_There_Was_You',
    '05-Octopus_s_Garden',
    '03-All_My_Loving',
    '05-And_I_Love_Her',
    '02-All_I_ve_Got_To_Do',
    '10-For_No_One',
    '08-Because',
    '06-She_s_Leaving_Home',
    '04-Chains',
    '10-Things_We_Said_Today',
    '09-One_After_909',
    '09-Girl',
    '14-Run_For_Your_Life',
    '04-Oh_Darling',
    '04-Don_t_Bother_Me',
    '06-I_Want_You_She_s_So_Heavy_',
    '06-Tell_Me_Why',
)
beatles_ds = prepare_datasource(('beatles',), allowed_files=allowed_files)
datasource = ds + beatles_ds

In [6]:
# Load all avaiable datasets
dataset = ChromaDataset(
    datasource, window_size=8192, hop_length=4096, context_size=7,
    cache=HDF5Cache(os.path.join(PROJECT_DIR, 'chroma_cache.hdf5')))

In [7]:
# Split dataset into train/val keeping equal proportions of the each class
# Now train and val datasets have equal probability distributions of classes
X = [s for s, _ in dataset]
targets = [t for _, t in dataset]
indices = np.arange(len(X))
X_train, X_val, y_train, y_val, _, _ = train_test_split(
    X, targets, indices, test_size=0.2, stratify=targets, random_state=RANDOM_STATE)

print("{0} train samples, {1} val samples".format(len(X_train), len(X_val)))
del X

206446 train samples, 51612 val samples


In [8]:
# Calculate TRAIN_MEAN, TRAIN_STD
X_train_temp = np.hstack([sample.squeeze(0) for sample in X_train])
TRAIN_MEAN = X_train_temp.mean(axis=1).reshape(-1, 1)
TRAIN_STD = X_train_temp.std(axis=1).reshape(-1, 1)

# Rescale inputs to have a mean of 0 and std of 1
train_data = [(standardize(i, TRAIN_MEAN, TRAIN_STD), t) for i, t in zip(X_train, y_train)]
val_data = [(standardize(i, TRAIN_MEAN, TRAIN_STD), t) for i, t in zip(X_val, y_val)]

del X_train_temp, X_train, X_val, y_val

In [9]:
# Balance the classes in each batch which hopefully helps the training.
sampler = get_weighted_random_sampler(targets, y_train)
del targets, y_train

In [10]:
batch_size = 512

# Split dataset into train/val dataloaders
loader_train = DataLoader(
    dataset=train_data,
    batch_size=batch_size,
    sampler=sampler,
    pin_memory=True,
    num_workers=0)
loader_val = DataLoader(
    dataset=val_data,
    num_workers=0,
    batch_size=batch_size)

dataloaders = {
    "train": loader_train,
    "val": loader_val
}

In [None]:
model = deep_auditory_v2()
learning_rate = 1e-3
epochs=128
optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999))

solver = Solver(
    model=model,
    optimizer=optimizer,
    dataloaders=dataloaders,
    learning_rate=learning_rate,
    epochs=epochs)
solver.train()

In [None]:
from sklearn.metrics import confusion_matrix

from chord_recognition.utils import one_hot
from chord_recognition.ann_utils import convert_annotation_matrix
from chord_recognition.evaluate import plot_confusion_matrix
from chord_recognition.predict import forward

device = torch.device('cpu')
model = deep_auditory_v2(pretrained=True)
model.eval()
if torch.cuda.is_available():
    device = torch.device('cuda')
    model.cuda()

In [None]:
val_loader = (i for i,_ in loader_val)
y_hat_matrix = forward(model, val_loader, device, 25)

In [None]:
y_hat_matrix = y_hat_matrix.cpu().data.numpy()

In [None]:
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['figure.dpi'] = 80

y_matrix = one_hot(y_val, 25)
y_true = convert_annotation_matrix(y_matrix)

y_pred = convert_annotation_matrix(y_hat_matrix)
labels = dataset.chord_labels
cm = confusion_matrix(y_true, y_pred, labels=labels)

plot_confusion_matrix(cm, labels)