### Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torch.utils.data as data
import matplotlib.pyplot as plt
import IPython.display
from IPython.display import Audio
import torch.optim as optim
from types import SimpleNamespace
import scipy.signal as sc
import time
from sklearn.metrics import f1_score

from trainDataset import TrainDataset
from testDataset import TestDataset
#from trainDatasetNew import TrainDatasetNew
#from testDatasetNew import TestDatasetNew
from validation_split import get_dataloaders
from math_utils import logMagStft, ffts
from SpectrogramCNN import SpectrogramCNN
from train_utils import train, test
from evaluation_utils import get_mean_F1

### Parameters

In [11]:
validation_split = .2
do_plots = False
load_model = True
args = SimpleNamespace(batch_size=64, test_batch_size=64, epochs=1,
                       lr=0.01, momentum=0.5, seed=1, log_interval=200, 
                      net = SpectrogramCNN)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if not torch.cuda.is_available(): # adapt those paths on other machine
    print('no cuda')
    path_train = './../data/train-small/'
    path_test =  './../data/test/kaggle-test/'
else:
    print('with cuda')
    path_train = './../data/kaggle-train/'
    path_test =  './../data/kaggle-test/'
    
path_model = 'models/model.pt'
path_submission = 'submissions/'
    
sample_rate = 16000
nmbr_classes = 10

with cuda


### Original Dataset

In [19]:
# todo add in the classes the features and the fft data

toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)

trainDataset = TrainDataset(path_train, transform=toFloat)
print(len(trainDataset))

testDataset = TestDataset(path_test, transform=toFloat)
print(len(testDataset))

269776
4096


In [20]:
input_size = len(trainDataset[0][0])
print('input size: ',input_size)

input size:  64000


### Look at Original Data

In [None]:
if do_plots:
    # how many instruments are there?
    dummy_count = np.zeros(20)

    for sample in trainDataset:
        dummy_count[sample[1]] += 1

    labels_count = []
    for elem in dummy_count:
        if elem != 0:
            labels_count.append(elem)

    print(labels_count)

In [None]:
if do_plots:
    nmbr_classes = len(labels_count)
    print('nmbr_classes: ', nmbr_classes)

In [None]:
if do_plots:
    plt.plot(labels_count, '*')

In [None]:
if do_plots:
    # plot one of each

    done = np.zeros(nmbr_classes)
    examples = []

    for sample in trainDataset:
        if done[sample[1]] == 0:
            examples.append(sample)
            done[sample[1]] = 1

In [None]:
if do_plots:
    plt.subplot(431)
    plt.plot(examples[0][0])

    plt.subplot(432)
    plt.plot(examples[1][0])

    plt.subplot(433)
    plt.plot(examples[2][0])

    plt.subplot(434)
    plt.plot(examples[3][0])

    plt.subplot(435)
    plt.plot(examples[4][0])

    plt.subplot(436)
    plt.plot(examples[5][0])

    plt.subplot(437)
    plt.plot(examples[6][0])

    plt.subplot(438)
    plt.plot(examples[7][0])

    plt.subplot(439)
    plt.plot(examples[8][0])

    plt.subplot(4,3,10)
    plt.plot(examples[9][0])

    plt.show()

In [None]:
if do_plots:

    # plot one of each in FFT

    plt.subplot(431)
    plt.plot(ffts(examples[0][0]))

    plt.subplot(432)
    plt.plot(ffts(examples[1][0]))

    plt.subplot(433)
    plt.plot(ffts(examples[2][0]))

    plt.subplot(434)
    plt.plot(ffts(examples[3][0]))

    plt.subplot(435)
    plt.plot(ffts(examples[4][0]))

    plt.subplot(436)
    plt.plot(ffts(examples[5][0]))

    plt.subplot(437)
    plt.plot(ffts(examples[6][0]))

    plt.subplot(438)
    plt.plot(ffts(examples[7][0]))

    plt.subplot(439)
    plt.plot(ffts(examples[8][0]))

    plt.subplot(4,3,10)
    plt.plot(ffts(examples[9][0]))

    plt.show()

In [None]:
if do_plots:

    for sample in examples:
        display(Audio(sample[0], rate=sample_rate))

### Dataloaders

In [23]:
# validation split is done here

train_loader, validation_loader = get_dataloaders(trainDataset, 
                                                  batch_size = args.batch_size, 
                                                  validation_split = validation_split, 
                                                  shuffle_dataset = True, 
                                                  random_seed = None)

for samples, instrument_family_target in train_loader:
        print(samples.shape, instrument_family_target.shape,
              instrument_family_target.data)
        print(torch.min(samples), torch.max(samples))
        print(trainDataset.transformInstrumentsFamilyToString(instrument_family_target.data))
        break
        
for samples, instrument_family_target in validation_loader:
        print(samples.shape, instrument_family_target.shape,
              instrument_family_target.data)
        print(torch.min(samples), torch.max(samples))
        print(trainDataset.transformInstrumentsFamilyToString(instrument_family_target.data))
        break

torch.Size([64, 64000]) torch.Size([64]) tensor([0, 4, 3, 7, 3, 0, 6, 6, 0, 6, 2, 6, 4, 0, 4, 5, 0, 5, 4, 7, 8, 4, 7, 4,
        8, 1, 7, 7, 4, 0, 4, 7, 0, 6, 9, 1, 9, 4, 4, 3, 0, 4, 0, 0, 4, 0, 2, 6,
        8, 0, 6, 3, 1, 8, 1, 0, 7, 7, 4, 6, 0, 6, 6, 6])
tensor(-0.9560, dtype=torch.float64) tensor(0.9562, dtype=torch.float64)
['bass' 'keyboard' 'guitar' 'reed' 'guitar' 'bass' 'organ' 'organ' 'bass'
 'organ' 'flute' 'organ' 'keyboard' 'bass' 'keyboard' 'mallet' 'bass'
 'mallet' 'keyboard' 'reed' 'string' 'keyboard' 'reed' 'keyboard' 'string'
 'brass' 'reed' 'reed' 'keyboard' 'bass' 'keyboard' 'reed' 'bass' 'organ'
 'vocal' 'brass' 'vocal' 'keyboard' 'keyboard' 'guitar' 'bass' 'keyboard'
 'bass' 'bass' 'keyboard' 'bass' 'flute' 'organ' 'string' 'bass' 'organ'
 'guitar' 'brass' 'string' 'brass' 'bass' 'reed' 'reed' 'keyboard' 'organ'
 'bass' 'organ' 'organ' 'organ']
torch.Size([64, 64000]) torch.Size([64]) tensor([0, 7, 0, 8, 7, 7, 4, 6, 0, 4, 5, 4, 5, 4, 0, 6, 6, 1, 5, 4, 9, 6, 5, 4,


In [24]:
test_loader = data.DataLoader(testDataset, batch_size=args.batch_size, shuffle=False) #!!! shuffle should be false
for samples in test_loader:
        print(samples.shape)
        print(torch.min(samples), torch.max(samples))
        break

torch.Size([64, 64000])
tensor(-0.9723, dtype=torch.float64) tensor(0.9834, dtype=torch.float64)


### Main

In [None]:
model = args.net(device).to(device)

In [10]:
# Main
optimizer = optim.SGD(model.parameters(), lr=args.lr, 
                      momentum=args.momentum)

info = {'lowest F1' : 100,
        'saved epoch' : None}

In [12]:
for epoch in range(1+3, args.epochs + 1 +3):
    train(args, model, device, train_loader, optimizer, epoch, start_time = time.time())
    f1 = get_mean_F1(model, validation_loader)
    print('after epoch {} got f1 score of {}'.format(epoch , f1))
    if f1 < info['lowest F1']:
        info['lowest F1'] = f1
        info['saved epoch'] = epoch 
        test(args, model, device, test_loader, epoch, trainDataset, testDataset, path_submission)
        torch.save(model, path_model)
        print('currently best model --> saved')

after epoch 4 got f1 score of 0.7823792950237007
saved predictions
currently best model --> saved


### Load Model

In [7]:
if load_model:
    model = torch.load(path_model)

In [17]:
get_mean_F1(model, validation_loader)

0.7869828692733053

In [25]:
epoch = 4
test(args, model, device, test_loader, epoch, trainDataset, testDataset, path_submission)

saved predictions
