### Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torch.utils.data as data
import matplotlib.pyplot as plt
import IPython.display
from IPython.display import Audio
import torch.optim as optim
from types import SimpleNamespace
import scipy.signal as sc
import time
from sklearn.metrics import f1_score
import pickle

from trainDataset import TrainDataset
from testDataset import TestDataset
#from trainDatasetNew import TrainDatasetNew
#from testDatasetNew import TestDatasetNew
from validation_split import get_dataloaders
from math_utils import logMagStft, ffts
from SpectrogramCNN import SpectrogramCNN
from train_utils import train, test, save_output, save_geometric_mean_predictions
from evaluation_utils import get_mean_F1, get_loss
from MulitScale1DResNet import MSResNet
from LSTM import LSTM
from FFTMulitScale1DResNet import FFTMSResNet
from SpectralResNet import SpectralResNet18, SpectralResNet34
from scipy.signal import hilbert
from FeatureFNN import FeatureFNN
from MelRawCombined import CNN1D, CNN2D

### Parameters

In [2]:
validation_split = .1
do_plots = False
load_model = True
args = SimpleNamespace(batch_size=4, test_batch_size=4, epochs=30,
                       lr=0.0001, momentum=0.9, seed=1, log_interval=200, weight_decay=0.0001,
                      net = SpectralResNet34) #SpectrogramCNN, MSResNet, SpectralResNet34, FFTMSResNet, FeatureFNN, CNN1D, CNN2D 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if not torch.cuda.is_available(): # adapt those paths on other machine
    print('no cuda')
    path_train = './../data/train-small/'
    path_test =  './../data/test/kaggle-test/'
else:
    print('with cuda')
    path_train = './../data/kaggle-train/'
    path_test =  './../data/kaggle-test/'
    
path_model = 'models/model.pt'
path_submission = 'submissions/'
    
sample_rate = 16000
nmbr_classes = 10

with cuda


### Original Dataset

In [3]:
# todo add in the classes the features and the fft data

toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)

trainDataset = TrainDataset(path_train, transform=toFloat)
print(len(trainDataset))

testDataset = TestDataset(path_test, transform=toFloat)
print(len(testDataset))

269776
4096


In [4]:
input_size = len(trainDataset[0][0])
print('input size: ',input_size)

input size:  64000


### Look at Original Data

In [5]:
if do_plots:
    # how many instruments are there?
    dummy_count = np.zeros(20)

    for sample in trainDataset:
        dummy_count[sample[1]] += 1

    labels_count = []
    for elem in dummy_count:
        if elem != 0:
            labels_count.append(elem)

    print(labels_count)

In [6]:
if do_plots:
    nmbr_classes = len(labels_count)
    print('nmbr_classes: ', nmbr_classes)

In [7]:
if do_plots:
    plt.plot(labels_count, '*')

In [8]:
if do_plots:
    # plot one of each

    done = np.zeros(nmbr_classes)
    examples = []

    for sample in trainDataset:
        if done[sample[1]] == 0:
            examples.append(sample)
            done[sample[1]] = 1

In [9]:
plt.rcParams["figure.figsize"] = (20,10)

In [10]:
#time series

if do_plots:

    for i in range(len(examples)):
        plt.subplot(4,3,i+1)
        plt.plot(examples[i][0])

In [11]:
#envelope

if do_plots:
    
    m = nn.MaxPool1d(50)

    for i in range(len(examples)):
        plt.subplot(4,3,i+1)
        env = torch.from_numpy(np.abs(hilbert(examples[i][0]))).view(1,1,-1)
        env = m(env)
        plt.plot(env[0,0].numpy())
        
    plt.show()

In [12]:
# fft

if do_plots:

    m = nn.MaxPool1d(50)

    for i in range(len(examples)):
        plt.subplot(4,3,i+1)
        fft = torch.from_numpy(ffts(examples[i][0])).view(1,1,-1)
        fft = m(fft)
        plt.plot(fft[0,0].numpy())
        
    plt.show()

In [13]:
# melspec

if do_plots:
    n_fft = 510
    for i in range(len(examples)):
        plt.subplot(4,3,i+1)
        spectrogram = logMagStft(examples[i][0], 16000, n_fft)
        plt.imshow(spectrogram)
        
    plt.show()

In [14]:
if do_plots:

    for sample in examples:
        display(Audio(sample[0], rate=sample_rate))

### Dataloaders

In [15]:
# validation split is done here

train_loader, validation_loader, indices = get_dataloaders(trainDataset, 
                                                  batch_size = args.batch_size, 
                                                  validation_split = validation_split, 
                                                  shuffle_dataset = True, 
                                                  random_seed = None)#,
                                                  #indices = indices[0])

for samples, instrument_family_target in train_loader:
        print(samples.shape, instrument_family_target.shape,
              instrument_family_target.data)
        print(torch.min(samples), torch.max(samples))
        print(trainDataset.transformInstrumentsFamilyToString(instrument_family_target.data))
        break
        
for samples, instrument_family_target in validation_loader:
        print(samples.shape, instrument_family_target.shape,
              instrument_family_target.data)
        print(torch.min(samples), torch.max(samples))
        print(trainDataset.transformInstrumentsFamilyToString(instrument_family_target.data))
        break

torch.Size([4, 64000]) torch.Size([4]) tensor([3, 4, 5, 0])
tensor(-0.9380, dtype=torch.float64) tensor(0.9464, dtype=torch.float64)
['guitar' 'keyboard' 'mallet' 'bass']
torch.Size([4, 64000]) torch.Size([4]) tensor([2, 8, 9, 0])
tensor(-0.9466, dtype=torch.float64) tensor(0.9466, dtype=torch.float64)
['flute' 'string' 'vocal' 'bass']


In [16]:
# save indices
with open(path_submission + "indices.txt", "wb") as fp:
    pickle.dump([indices], fp)

In [17]:
test_loader = data.DataLoader(testDataset, batch_size=args.batch_size, shuffle=False) #!!! shuffle should be false
for samples in test_loader:
        print(samples[0].shape)
        print(torch.min(samples[0]), torch.max(samples[0]))
        break

torch.Size([4, 64000])
tensor(-0.8568, dtype=torch.float64) tensor(0.9467, dtype=torch.float64)


### Main

In [18]:
if (args.net == SpectrogramCNN) or (args.net == SpectralResNet18) \
    or (args.net == FeatureFNN) or (args.net == CNN1D) \
    or (args.net == CNN2D) or (args.net == SpectralResNet34):
    model = args.net(device).to(device)

In [19]:
if (args.net == MSResNet) or (args.net == FFTMSResNet):
    model = args.net(1, device).to(device)

In [20]:
if args.net == LSTM:
    model = args.net(device, input_size = 252, hidden_size = 512, num_layers = 1, num_classes = 10).to(device)

In [21]:
print(model)

SpectralResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1,

In [22]:
# Main
#optimizer = optim.Adam(model.parameters(), lr=args.lr)

optimizer = optim.SGD(model.parameters(), lr= args.lr, momentum= args.momentum, weight_decay= args.weight_decay)

info = {'highest F1' : 0,
        'lowest loss' : 100,
        'saved epoch' : 10}

In [13]:
for epoch in range(1, args.epochs + 1):
    
    if epoch%10 == 0:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] / 10
            print('New Learning Rate: ', param_group['lr'])
    
    train(args, model, device, train_loader, optimizer, epoch, start_time = time.time())
    #f1 = get_mean_F1(model, validation_loader)
    loss_epoch, mean_f1 = get_loss(model, validation_loader, device)
    #print('after epoch {} got f1 score of {}'.format(epoch , f1))
    print('after epoch {} got loss of {} and f1 of {}'.format(epoch , loss_epoch, mean_f1))
    #if f1 > info['highest F1']:
    #    info['highest F1'] = np.copy(f1)
    #    info['saved epoch'] = epoch 
    #    test(args, model, device, test_loader, epoch, trainDataset, testDataset, path_submission)
    #    torch.save(model, path_model)
    #    print('currently best model --> saved')
    if loss_epoch < info['lowest loss']:
        info['lowest loss'] = np.copy(loss_epoch)
        info['highest F1'] = np.copy(mean_f1)
        info['saved epoch'] = epoch 
        test(args, model, device, test_loader, epoch, trainDataset, testDataset, path_submission)
        torch.save(model, path_model)
        print('currently best model --> saved')    
        
print(info)



after epoch 11 got f1 score of 0.9186310847541796


after epoch 12 got f1 score of 0.9096182357302074


after epoch 13 got f1 score of 0.9740363232024414
saved predictions
currently best model --> saved




after epoch 14 got f1 score of 0.9323573017050111


after epoch 15 got f1 score of 0.9810415122313564
saved predictions
currently best model --> saved


after epoch 16 got f1 score of 0.9047998517420687




after epoch 17 got f1 score of 0.9241104521868475


after epoch 18 got f1 score of 0.9511489992587657


after epoch 19 got f1 score of 0.8923091178651164


after epoch 20 got f1 score of 0.9582344946874819
{'highest F1': array(0.98104151), 'saved epoch': 15}


### save or load outputs

In [26]:
save_output(args, model, device, test_loader, 'MSResNet-score701', 
            trainDataset, testDataset, path_save = path_submission)

saved outputs


In [27]:
save_geometric_mean_predictions(path_submission + 'output-MSResNet-score701.txt', 
                                path_submission + 'output-SpectralResNet-score756.txt', 
                                path_submission, 
                                trainDataset, testDataset)

saved predictions


### Load Model

In [3]:
if load_model:
    #model = torch.load(path_model)
    model = torch.load('models/MSResNet_10epoch_score701.pt')

In [4]:
# load indices
with open(path_submission + "indices_MSResNet_score701.pt", "rb") as fp:
    indices = pickle.load(fp)

In [23]:
#get_mean_F1(model, validation_loader)
get_loss(model, validation_loader,device)

(2.308869604184242, 0.03617494440326128)

In [None]:
epoch=10
test(args, model, device, test_loader, epoch, trainDataset, testDataset, path_submission)