In [3]:
from importlib import reload
import pymongo
import gridfs
import numpy as np
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from scipy.sparse import csc_matrix, csr_matrix
import pickle
import pretty_midi
import sys
import copy
from collections import namedtuple
import timeit
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd
from IPython.display import clear_output
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [16]:
# import modules, including a reload statement so that they can be reimported after a change to the methods 
import src.midi_utils as midi_utils
reload(midi_utils)

import src.data as data
reload(data)

import src.models as models
reload(models)

import src.ml_classes as ml_classes
reload(ml_classes)

import src.exp_utils as exp_utils
reload(exp_utils)

import src.losses as losses
reload(losses)

<module 'src.losses' from '/storage/781-piano-autoencoder/src/losses.py'>

## Examining hyperparameters: plotting metrics from different training runs 

### Testing different MSE weights
344-348
Different MSE weights
Testing different velocity weights (with V MSE). 10^4 seems best balance - increasing velocity weight above that worsens H metrics a lot, with little gain for V - in fact, validation gets worse after that!

In [33]:
### set up
id_list = [i for i in range(344, 349)] # runs of interest
x = 'loss_weights' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_runs(id_list, x, parameter_is_list=True,index_of_interest=-1)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='H_out_loss'), Che…

### Overfitting with Large Network
387
Using hidden state size of 400. 2 bidirectional, 1 unidirectional layers.

In [17]:
### set up
id_list = [387] # runs of interest
x = None # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_training(id_list, x, plot_params={'title': 'Overfitting'})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

### testing different recurrent dropouts
387, 389-394  
Using hidden state size of 400. 2 bidirectional, 1 unidirectional layers.  
0.3 recurrent dropout clearly does the best.

In [59]:
### set up
id_list = [i for i in range(389, 395)] + [387] # runs of interest
x = 'recurrent_dropout' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_runs(id_list, x, plot_params={'title': 'Recurrent Dropout vs Validation Loss', 'figsize': (7,7)}, return_df=True)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

Unnamed: 0,recurrent_dropout,loss,mse,val_loss,val_mse,accuracy,val_accuracy,epochs
387,0.0,0.00088,0.00088,0.006014,0.006014,0.0,0.0,97
389,0.3,0.001448,0.001448,0.005408,0.005408,0.0,0.0,126
391,0.1,0.001142,0.001142,0.005808,0.005808,0.0,0.0,109
392,0.2,0.000866,0.000866,0.005552,0.005552,0.0,0.0,123
393,0.4,0.00176,0.00176,0.005445,0.005445,0.0,0.0,126
394,0.5,0.002395,0.002395,0.005459,0.005459,0.0,0.0,128


In [39]:
### compare training for 0.0 and 0.3 dropout
id_list = [387, 389] # runs of interest
x = 'recurrent_dropout' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_training(id_list, x, plot_params={'title': 'Effect of Recurrent Dropout on Training', 'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

### testing different hidden state sizes
372-379  
3 layer model: testing hidden state size. Not big differences. go with 400. Not much worse than 4 layer model.

In [62]:
### set up
id_list = [i for i in range(372, 380)] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_runs(id_list, x)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

### testing small models, 0.4 dropout - Old dataset
405-417  
testing small models - 48 state size overfits. 24 is best - finally doesn't overfit.  
Note that 387 is the first run using the old dataset

In [61]:
### set up
id_list = [i for i in range(405, 417)] + [400] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_runs(id_list, x, return_df = True, plot_params={'title': 'Hidden State Size vs Loss',
                                                                'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

Unnamed: 0,hidden_state,loss,mse,val_loss,val_mse,accuracy,val_accuracy,epochs
400,200,0.002411,0.002411,0.005564,0.005564,0.0,0.0,112
405,2,0.007591,0.007591,0.006967,0.006967,0.0,0.0,820
406,3,0.007151,0.007151,0.006712,0.006712,0.0,0.0,511
407,4,0.006655,0.006655,0.006222,0.006222,0.0,0.0,1447
408,5,0.006475,0.006475,0.006049,0.006049,0.0,0.0,797
409,6,0.006311,0.006311,0.005794,0.005794,0.0,0.0,861
410,7,0.005877,0.005877,0.005739,0.005739,0.0,0.0,1500
414,12,0.005372,0.005372,0.005534,0.005534,0.0,0.0,1158
415,24,0.004857,0.004857,0.005303,0.005303,0.0,0.0,469
416,48,0.003463,0.003463,0.005487,0.005487,0.0,0.0,358


In [55]:
### set up
id_list = [407, 416, 415] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
df = data.plot_sacred_training(id_list, x, return_df = True,
                          plot_params={'title': 'Effect of Hidden State Size on Training', 'figsize': (7,7)},
                                        epoch_lim=500)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

In [171]:
### Non dropout
id_list = [i for i in range(418, 424)] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_runs(id_list, x)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

In [172]:
### Pn only models, testing how they compare to PCn PSn models in 418 to 423
id_list = [i for i in range(424, 432)] + [i for i in range(418, 424)] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_runs(id_list, x, split='model_inputs')

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

In [140]:
### Pn only models, testing how they compare to PCn PSn models in 418 to 423
id_list = [i for i in range(418, 441)] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
split = 'model_inputs'
data.plot_sacred_runs(id_list, x, split=split)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

### testing different inputs
418:424 with beat indicators vs 442:449 without
Actually, use 442:457 - retrained some with beat indicators

In [50]:
### set up
id_list = [i for i in range(442, 458)]# runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
split = 'model_inputs'
data.plot_sacred_runs(id_list, x, split, return_df=True, plot_params={'title': 'Excluding Rhythmic Information',
                                                                'ylabel': 'val MSE',
                                                                'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

Unnamed: 0,hidden_state,loss,mse,val_loss,val_mse,accuracy,val_accuracy,model_inputs,epochs
442,4,0.00746,0.00746,0.006855,0.006855,0.0,0.0,"['PSn', 'PCn']",852
443,6,0.007326,0.007326,0.006736,0.006736,0.0,0.0,"['PSn', 'PCn']",599
444,8,0.007398,0.007398,0.006685,0.006685,0.0,0.0,"['PSn', 'PCn']",308
445,12,0.006554,0.006554,0.005876,0.005876,0.0,0.0,"['PSn', 'PCn']",692
446,16,0.00608,0.00608,0.005876,0.005876,0.0,0.0,"['PSn', 'PCn']",796
447,20,0.006087,0.006087,0.005947,0.005947,0.0,0.0,"['PSn', 'PCn']",493
448,30,0.005498,0.005498,0.006082,0.006082,0.0,0.0,"['PSn', 'PCn']",361
449,50,0.003774,0.003774,0.006063,0.006063,0.0,0.0,"['PSn', 'PCn']",246
450,4,0.006677,0.006677,0.006223,0.006223,0.0,0.0,"['PSn', 'PCn', 'TBn', 'TSBn']",849
451,6,0.006488,0.006488,0.006135,0.006135,0.0,0.0,"['PSn', 'PCn', 'TBn', 'TSBn']",475


In [179]:
### Hidden state size of 50: perhaps overfitting is why no rhythm info does better?
id_list = [449, 457]# runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
split = 'model_inputs'
data.plot_sacred_training(id_list, split, return_df=True)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

Unnamed: 0,model_inputs,loss,accuracy,mse,val_loss,val_accuracy,val_mse
443,"[PSn, PCn]","[0.026991229504346848, 0.022944767028093338, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.026991231366991997, 0.02294476516544819, 0....","[0.028154844418168068, 0.027469953522086143, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.028154844418168068, 0.027469953522086143, 0..."
451,"[PSn, PCn, TBn, TSBn]","[0.026387937366962433, 0.021903852000832558, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02638794295489788, 0.02190385013818741, 0.0...","[0.02560662291944027, 0.024573639035224915, 0....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02560662291944027, 0.024573637172579765, 0...."


In [51]:
### Hidden state size of 20
id_list = [447, 455]# runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
split = 'model_inputs'
data.plot_sacred_training(id_list, split, return_df=True, plot_params={'title': 'Excluding Rhythmic Information, HSS = 20',
                                                                 'ylim': [0.0045, 0.02],
                                                                 'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

Unnamed: 0,model_inputs,loss,accuracy,mse,val_loss,val_accuracy,val_mse
447,"[PSn, PCn]","[0.02430538646876812, 0.020610108971595764, 0....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02430538460612297, 0.020610108971595764, 0....","[0.025881074368953705, 0.019983043894171715, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.025881074368953705, 0.019983042031526566, 0..."
455,"[PSn, PCn, TBn, TSBn]","[0.023474223911762238, 0.018939703702926636, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02347422204911709, 0.018939705565571785, 0....","[0.02234908938407898, 0.019693830981850624, 0....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.02234908938407898, 0.019693832844495773, 0...."


### testing different inputs
415 vs 470 vs 471: three models, each with different inputs.
PSn + PCn is smoothest, trains fastest.

In [24]:
### All
id_list = [415, 470, 471, 472, 473, 474] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_training(id_list, x, epoch_lim=60, plot_params={'title': 'Early Training for Various Inputs'})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

In [33]:
### Main two
id_list = [415, 471] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_training(id_list, x, epoch_lim=700,
                     plot_params={'title': 'Pitch Representations: [Pn] vs [PCn, PSn]',
                                 'ylabel': 'val MSE',
                                 'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

In [34]:
### Main two, early stages
id_list = [415, 471] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_training(id_list, x, epoch_lim=80,
                     plot_params={'title': 'Pitch Representations: [Pn] vs [PCn, PSn], Early Training',
                                 'ylabel': 'val MSE',
                                 'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

In [156]:
### All
id_list = [415, 470, 471, 472, 473, 474] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_training(id_list, x, epoch_lim=100)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

In [9]:
### All
id_list = [415, 470, 471, 472, 473, 474] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
data.plot_sacred_training(id_list, x, epoch_lim=100)

NameError: name 'pymongo' is not defined

### Testing different inputs data limited


In [48]:
### set up
id_list = [499, 410, 513] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
split = 'model_inputs'
plot_sacred_training(id_list, split, epoch_lim=1250, plot_params={'ylim': (0.0045, 0.025),
                                                                  'title': 'Data Limited Comparison of Inputs 1',
                                                                 'ylabel': 'val MSE',
                                                                 'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

In [71]:
### set up
id_list = [499, 410, 513, 512] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
split = 'model_inputs'
plot_sacred_runs(id_list, split, return_df=True, plot_params={'ylim': (0.0045, 0.025),
                                                                  'title': 'Data Limited Comparison of Inputs 1',
                                                                 'ylabel': 'val MSE',
                                                                 'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

Unnamed: 0,model_inputs,loss,mse,val_loss,val_mse,accuracy,val_accuracy,epochs
410,"[PCn, PSn, TBn, TSBn]",0.005877,0.005877,0.005739,0.005739,0.0,0.0,1500
499,"[Pn, TBn, TSBn]",0.006885,0.006885,0.006634,0.006634,0.0,0.0,2173
512,"[PSn, Pn, TBn, TSBn]",0.005776,0.005776,0.006387,0.006387,0.0,0.0,2661
513,"[PCn, Pn, TBn, TSBn]",0.006813,0.006813,0.00652,0.00652,0.0,0.0,1973


In [49]:
### set up
id_list = [410, 512] # runs of interest
x = 'model_inputs' # hyperparameter of interest - will be plotted on x axis
split = 'model_inputs'
plot_sacred_training(id_list, split, epoch_lim=1250, plot_params={'ylim': (0.0045, 0.025),
                                                                  'title': 'Data Limited Comparison of Inputs 2',
                                                                 'ylabel': 'val MSE',
                                                                 'figsize': (7,7)})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

### testing different hidden states, limiting training data
Earlier runs use a much smaller data set - shows that smaller network size is better for less data.

In [63]:
### set up
id_list = [i for i in range(478, 485)] + [i for i in range(405, 417)] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
split = 'nth_example'
plot_sacred_runs(id_list, x, split, return_df=True, plot_params={'title': 'Effect of Data Restriction on Optimum Hidden State Size', 'ylabel': 'val MSE'})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

Unnamed: 0,hidden_state,loss,mse,val_loss,val_mse,accuracy,val_accuracy,nth_example,epochs
405,2,0.007591,0.007591,0.006967,0.006967,0.0,0.0,,820
406,3,0.007151,0.007151,0.006712,0.006712,0.0,0.0,,511
407,4,0.006655,0.006655,0.006222,0.006222,0.0,0.0,,1447
408,5,0.006475,0.006475,0.006049,0.006049,0.0,0.0,,797
409,6,0.006311,0.006311,0.005794,0.005794,0.0,0.0,,861
410,7,0.005877,0.005877,0.005739,0.005739,0.0,0.0,,1500
414,12,0.005372,0.005372,0.005534,0.005534,0.0,0.0,,1158
415,24,0.004857,0.004857,0.005303,0.005303,0.0,0.0,,469
416,48,0.003463,0.003463,0.005487,0.005487,0.0,0.0,,358
478,48,0.002493,0.002493,0.007181,0.007181,0.0,0.0,8.0,657


### base key with/without
485 uses base key (I thought that was turned on anyway!). Does worse!

In [66]:
id_list = [415, 485] # runs of interest
x = 'use_base_key' # hyperparameter of interest - will be plotted on x axis
df = plot_sacred_training(id_list, x,epoch_lim=800, return_df=True, plot_params={'title': 'The Effect of "Use Base Key"',
                                                            'ylim': [0.0045, 0.019]})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='accuracy'), Check…

### testing three layer models


In [69]:
id_list = [415, 485] # runs of interest
x = 'use_base_key' # hyperparameter of interest - will be plotted on x axis
plot_sacred_runs(id_list, x,return_df=True, plot_params={'title': 'The Effect of "Use Base Key"',
                                                            'ylim': [0.0045, 0.019]})

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…

Unnamed: 0,use_base_key,loss,mse,val_loss,val_mse,accuracy,val_accuracy,epochs
415,False,0.004857,0.004857,0.005303,0.005303,0.0,0.0,469
485,True,0.004942,0.004942,0.005634,0.005634,0.0,0.0,433


In [118]:
### set up
id_list = [i for i in range(488, 494)] + [i for i in range(405, 417)] # runs of interest
x = 'hidden_state' # hyperparameter of interest - will be plotted on x axis
split = 'bi_encoder_lstms'
plot_sacred_runs(id_list, x, split)

VBox(children=(Checkbox(value=False, description='loss'), Checkbox(value=False, description='mse'), Checkbox(v…