In [3]:
import itertools
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

PROJECT_DIR = os.path.dirname(os.getcwd())
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

from chord_recognition.cache import HDF5Cache
from chord_recognition.models import deep_auditory_v2
from chord_recognition.dataset import ChromaDataset, prepare_datasource, undersample_dataset
from chord_recognition.utils import standardize, one_hot
from chord_recognition.train import get_weighted_random_sampler, Solver
from chord_recognition.ann_utils import convert_annotation_matrix, get_chord_labels
from chord_recognition.evaluate import plot_confusion_matrix
from chord_recognition.predict import forward


torch.manual_seed(2020)
RANDOM_STATE = 42

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 5)

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
cache = HDF5Cache(os.path.join(PROJECT_DIR, 'chroma_cache.hdf5'))

In [None]:
# Exp3
# Balance datasets separately and concat them into single dataset.

# Balance beatles dataset
ds = prepare_datasource(('beatles',))
dataset = ChromaDataset(
    ds, window_size=8192, hop_length=4096,
    cache=cache)

sampling_strategy = {
    0: 8000,
    2: 8000,
    4: 8000,
    5: 8000,
    7: 8000,
    9: 8000,
    11: 8000,
    24: 8000,
}
beatles_X, beatles_y = undersample_dataset(
    dataset,
    sampling_strategy,
    RANDOM_STATE)

# Balance robbie_williams dataset
ds = prepare_datasource(('robbie_williams',))
dataset = ChromaDataset(
    ds, window_size=8192, hop_length=4096,
    cache=cache)

sampling_strategy = {
    0: 8000,
    2: 8000,
    5: 8000,
    7: 8000,
    9: 8000,
    24: 5000,
}
robbie_williams_X, robbie_williams_y = undersample_dataset(
    dataset,
    sampling_strategy,
    RANDOM_STATE)

# Balance queen dataset
ds = prepare_datasource(('queen',))
dataset = ChromaDataset(
    ds, window_size=8192, hop_length=4096,
    cache=cache)

sampling_strategy = {
    2: 4500,
}
queen_X, queen_y = undersample_dataset(
    dataset,
    sampling_strategy,
    RANDOM_STATE)

# Get zweieck data
ds = prepare_datasource(('zweieck',))
dataset = ChromaDataset(
    ds, window_size=8192, hop_length=4096,
    cache=cache)

zweieck_X = [xi for xi, _ in dataset]
zweieck_y = [yi for _, yi in dataset]

In [None]:
# Concat all datasets
dataset = itertools.chain(
    zip(beatles_X, beatles_y),
    zip(robbie_williams_X, robbie_williams_y),
    zip(beatles_X, beatles_y),
    zip(queen_X, queen_y),
    zip(zweieck_X, zweieck_y),
)

In [None]:
# Split dataset into train/val keeping equal proportions of the each class
# Now train and val datasets have equal class probability distributions.
ds1, ds2 = itertools.tee(dataset)
X = [s for s, _ in ds1]
targets = [t for _, t in ds2]
indices = np.arange(len(X))
X_train, X_val, y_train, y_val, _, _ = train_test_split(
    X, targets, indices, test_size=0.2, stratify=targets, random_state=RANDOM_STATE)

In [None]:
# Calculate TRAIN_MEAN, TRAIN_STD
X_train_temp = np.hstack([sample.squeeze(0) for sample in X_train])
TRAIN_MEAN = X_train_temp.mean(axis=1).reshape(-1, 1)
TRAIN_STD = X_train_temp.std(axis=1).reshape(-1, 1)

# Rescale inputs to have a mean of 0 and std of 1
train_data = [(standardize(i, TRAIN_MEAN, TRAIN_STD), t) for i, t in zip(X_train, y_train)]
val_data = [(standardize(i, TRAIN_MEAN, TRAIN_STD), t) for i, t in zip(X_val, y_val)]

del X_train_temp, X_train, X_val

In [None]:
# Balance the classes in each batch which hopefully helps the training.
sampler = get_weighted_random_sampler(targets, y_train)

In [None]:
batch_size = 512

# Split dataset into train/val datasets
loader_train = DataLoader(
    dataset=train_data,
    batch_size=batch_size,
    sampler=sampler,
    pin_memory=True,
    num_workers=0)
loader_val = DataLoader(
    dataset=val_data,
    num_workers=0,
    batch_size=batch_size)

dataloaders = {
    "train": loader_train,
    "val": loader_val
}

In [None]:
model = deep_auditory_v2()
if torch.cuda.is_available():
    model.cuda()
learning_rate = 1e-3
epochs=128
optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999))

solver = Solver(
    model=model,
    optimizer=optimizer,
    dataloaders=dataloaders,
    learning_rate=learning_rate,
    trained_model_name="deep_auditory_v2_exp3.pth",
    epochs=epochs)
solver.train()

In [2]:
device = torch.device('cpu')

model = deep_auditory_v2(pretrained=True)
model.eval()
if torch.cuda.is_available():
    device = torch.device('cuda')
    model.cuda()

In [4]:
for param in model.parameters():
    p = param.data
    print(param.shape, p.mean().numpy(), p.std().numpy(), p.min().numpy(), p.max().numpy())

torch.Size([32, 1, 3, 3]) 0.01195588 0.23041508 -0.44311804 0.4832016
torch.Size([32]) 1.000668 0.13506913 0.7528172 1.2881731
torch.Size([32]) 0.016741447 0.11628908 -0.22354493 0.34997138
torch.Size([32, 32, 3, 3]) -0.00842397 0.121347636 -0.43391904 0.4086566
torch.Size([32]) 1.0101022 0.05380383 0.9211984 1.1595175
torch.Size([32]) 0.015937982 0.09074649 -0.14982596 0.1793945
torch.Size([64, 32, 3, 3]) -0.014991228 0.12530607 -0.5356139 0.4391299
torch.Size([64]) 0.99884516 0.07463706 0.8075169 1.2038639
torch.Size([64]) 0.15452284 0.12747581 -0.17548582 0.415933
torch.Size([64, 64, 3, 3]) -0.017967353 0.12552185 -0.5802904 0.41256332
torch.Size([64]) 1.0080143 0.06129231 0.8353929 1.1587982
torch.Size([64]) -0.054166585 0.14739095 -0.37531173 0.26922706
torch.Size([128, 64, 12, 1]) -0.01969931 0.122990415 -0.7273003 0.6409122
torch.Size([128]) 1.0019767 0.070718214 0.830612 1.2303199
torch.Size([128]) -0.049630634 0.1690484 -0.559656 0.3431738
torch.Size([128, 128, 1, 9]) -0.00958

In [None]:
val_loader = (i for i,_ in loader_val)
y_hat_matrix = forward(model, val_loader, device, 25)

In [None]:
y_hat_matrix = y_hat_matrix.cpu().data.numpy()

In [None]:
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['figure.dpi'] = 80

y_matrix = one_hot(y_val, 25)
y_true = convert_annotation_matrix(y_matrix)

y_pred = convert_annotation_matrix(y_hat_matrix)
labels = get_chord_labels(nonchord=True)
cm = confusion_matrix(y_true, y_pred, labels=labels)

plot_confusion_matrix(cm, labels)