In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
# import libraries
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential 
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
# Helper libraries
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from google.colab import files
import io
from sklearn.utils import shuffle

# load mostly preprocessed data (preprocessing accomplished before 3/25 proposal)

# would use regular numpy syntax to load locally:
#X = np.load('<path>/X_cleaned_attempt_new.npy',allow_pickle=True)
#Y = np.load('<path>/Y_cleaned_attempt_new.npy',allow_pickle=True)
#X = np.asarray(X).astype('float32')
#Y = np.asarray(Y).astype('float32')

# i'm using colab, so it's slightly more complicated
ytrainval_byteseq = files.upload() # choose 'Y_cleaned_attempt_new.npy' from local system
ytrainval_filelike = io.BytesIO(ytrainval_byteseq['Y_cleaned_attempt_new.npy']) # create file-like object
ytrainval = np.load(ytrainval_filelike,allow_pickle=True) # create regular numpy array
xtrainval_byteseq = files.upload() # choose 'X_cleaned_attempt_new.npy' from local system
xtrainval_filelike = io.BytesIO(xtrainval_byteseq['X_cleaned_attempt_new.npy']) # create file-like object
xtrainval = np.load(xtrainval_filelike,allow_pickle=True) # create regular numpy array
ytest_byteseq = files.upload() # choose 'Y_test.npy' from local system
ytest_filelike = io.BytesIO(ytest_byteseq['Y_test.npy']) # create file-like object
ytest = np.load(ytest_filelike,allow_pickle=True) # create regular numpy array
xtest_byteseq = files.upload() # choose 'X_test.npy' from local system
xtest_filelike = io.BytesIO(xtest_byteseq['X_test.npy']) # create file-like object
xtest = np.load(xtest_filelike,allow_pickle=True) # create regular numpy array

#check shapes
print(xtrainval.shape)
print(ytrainval.shape)
print(xtest.shape)
print(ytest.shape)

#ytrainval needs to be converted to number representation of labels
feature_map = {'Rock':0,'Electronic':1,'Experimental':2,'Hip-Hop':3,'Folk':4,'Instrumental':5,'Pop':6,
              'International':7,'Classical':8,'Old-Time / Historic':9, 'Jazz':10,'Country':11,'Soul-RnB':12,
              'Spoken':13,'Blues':14,'Easy Listening':15}
for i in range(ytrainval.shape[0]):
    ytrainval[i] = float(feature_map[ytrainval[i]])
ytrainval[i] = np.asarray(ytrainval[i]).astype('float32')

print(ytrainval)
print(ytest)

# for the preliminary report, i did a roughly 80-20 split, but used the 20% as the training set to improve speed
# so will recombine and do a 90-10 split, using the 90% as training/validation
xfull = np.concatenate((xtrainval,xtest),axis=0)
yfull = np.concatenate((ytrainval,ytest),axis=0)
print(xfull.shape)
print(yfull.shape)



Saving Y_cleaned_attempt_new.npy to Y_cleaned_attempt_new.npy


Saving X_cleaned_attempt_new.npy to X_cleaned_attempt_new.npy


Saving Y_test.npy to Y_test.npy


Saving X_test.npy to X_test.npy
(9349, 520)
(9349,)
(40249, 520)
(40249,)
[3.0 3.0 3.0 ... 4.0 0.0 array(0., dtype=float32)]
[3.0 6.0 0.0 ... 0.0 0.0 array(0., dtype=float32)]
(49598, 520)
(49598,)


In [2]:
# this is where the strategy changes. We need to do a different train/test/val split
# let's see what the overall distribution between the categories is
yfull_1hot = to_categorical(yfull)
yfull_sum = np.sum(yfull_1hot, axis=0)
print(yfull_sum)


[14182.  9372. 10608.  3552.  2803.  2079.  2332.  1389.  1230.   554.
   571.   194.   175.   423.   110.    24.]


In [4]:
# In an extreme case, we might just take 20 of each class in the train set

yfull = yfull.reshape(yfull.shape[0],1)
both_full = np.concatenate((xfull, yfull), axis=1)
print(both_full.shape)
both_full = shuffle(both_full)
indices = [] 
num_counter = []
for i in range(16):
    num_counter.append(0)
for i in range(both_full.shape[0]):
    if num_counter[int(both_full[i,-1])] < 20:
        num_counter[int(both_full[i,-1])] += 1
        indices.append(i)
print(num_counter)
both_train_small = both_full[indices]
y_train_small = both_train_small[:,-1]
y_train_small_1hot = to_categorical(y_train_small).astype('float32')
x_train_small = both_train_small[:,:-1]
print(y_train_small_1hot.shape)
print(y_train_small.shape)
print(x_train_small.shape)

(49598, 521)
[20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]
(320, 16)
(320,)
(320, 520)


In [None]:
# need to implement k-fold cross validation
'''
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.3),
          layers.Dense(100,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(256,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(128,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(64,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(32,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
# COMPILE NEW MODEL
model.compile(loss='categorical_crossentropy',
    optimizer=keras.optimizers.Adam(
        learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
        metrics=['accuracy'])


# see https://medium.com/the-owl/k-fold-cross-validation-in-keras-3ec4a3a00538
from sklearn.model_selection import KFold, StratifiedKFold

kf = KFold(n_splits = x_trainval_small.shape[0])
for train_index, val_index in kf.split(np.zeros(x_trainval_small.shape[0]),y_trainval_small_1hot):
	
	
	
	
	# FIT THE MODEL
	history = model.fit(x_trainval_small,
			    epochs=10,,
			    validation_data=valid_data_generator)
	#PLOT HISTORY
	#		:
	#		:
	
	# LOAD BEST MODEL to evaluate the performance of the model
	model.load_weights("/saved_models/model_"+str(fold_var)+".h5")
	
	results = model.evaluate(valid_data_generator)
	results = dict(zip(model.metrics_names,results))
	
	VALIDATION_ACCURACY.append(results['accuracy'])
	VALIDATION_LOSS.append(results['loss'])
	
	tf.keras.backend.clear_session()
	
	fold_var += 1
  '''

In [31]:
'''
# let's give the datasets better names to reflect what i'm actually doing (kind of abandoned above plan)
import copy 
x_trainval_small = x_trainval_small.astype('float32')
y_trainval_small_1hot = y_trainval_small_1hot.astype('float32')
x_train_small = copy.deepcopy(x_trainval_small)
y_train_small_1hot = copy.deepcopy(y_trainval_small_1hot)
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.3),
          layers.Dense(100,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(256,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(128,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(64,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(32,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_trainval_small, y_trainval_small_1hot, batch_size=32, epochs=100, validation_data=(x_trainval_small, y_trainval_small_1hot))
'''

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [36]:
'''
y_test = np.array([both_full[i,-1] for i in range(2000) if i not in indices])
x_test = np.array([both_full[i,:-1] for i in range(2000) if i not in indices])
y_test_1hot = to_categorical(y_test)
ytrain_sum = np.sum(y_train_small_1hot, axis=0)
print(y_test_1hot.shape)
print(x_test.shape)
ytest_sum = np.sum(y_test_1hot, axis=0)
print(ytest_sum)
print(yfull_sum)
print(ytrain_sum)
'''

(1747, 11)
(1747, 520)
[553. 382. 386. 119.  85.  67.  79.  24.  34.  12.   6.]
[14182.  9372. 10608.  3552.  2803.  2079.  2332.  1389.  1230.   554.
   571.   194.   175.   423.   110.    24.]
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.]


In [10]:
val_numbers = [(i-20)//2 for i in yfull_sum] # number of each instance to put in val set 
val_indices = []
num_counter = []
for i in range(16):
    num_counter.append(0)
for i in range(both_full.shape[0]):
    if i not in indices: # don't count if already in training set
        if num_counter[int(both_full[i,-1])] < val_numbers[int(both_full[i,-1])]:
            num_counter[int(both_full[i,-1])] += 1
            val_indices.append(i)

#indices and val_indices should be different lists
for i in indices:
  for j in val_indices:
    assert i!=j

both_val_small = both_full[val_indices,:]
y_val_small = both_val_small[:,-1]
y_val_small_1hot = to_categorical(y_val_small).astype('float32')
x_val_small = both_val_small[:,:-1]
print(y_val_small_1hot.shape)
print(y_val_small.shape)
print(x_val_small.shape)

(24636, 16)
(24636,)
(24636, 520)


In [9]:
print(num_counter)

[7081, 4676, 5294, 1766, 1391, 1029, 1156, 684, 605, 267, 275, 87, 77, 201, 45, 2]


In [11]:
# now we need to put everything else in the test set
test_indices = []
for i in range(both_full.shape[0]):
    if i not in indices: # don't count if already in training set
        if i not in val_indices: # don't count if already in validation set
            test_indices.append(i)
both_test_small = both_full[test_indices,:]
y_test_small = both_test_small[:,-1]
y_test_small_1hot = to_categorical(y_test_small).astype('float32')
x_test_small = both_test_small[:,:-1]
print(y_test_small_1hot.shape)
print(y_test_small.shape)
print(x_test_small.shape)
assert y_test_small.shape[0] + y_val_small.shape[0] + y_train_small.shape[0] == both_full.shape[0]

(24642, 16)
(24642,)
(24642, 520)


In [16]:
from copy import deepcopy
#y_train_small_1hot_copy = deepcopy(y_train_small_1hot) float32 copy
#y_val_small_1hot_copy = deepcopy(y_val_small_1hot) float32 copy
#y_test_small_1hot_copy = deepcopy(y_test_small_1hot) float32 copy
x_train_small_copy = deepcopy(x_train_small) # float copy
x_val_small_copy = deepcopy(x_val_small) # float copy
x_test_small_copy = deepcopy(x_test_small) # float copy

x_train_small = x_train_small.astype('float32')
x_val_small = x_val_small.astype('float32')
x_test_small = x_test_small.astype('float32')

y_train_small_1hot = y_train_small_1hot.astype('int')
y_val_small_1hot = y_val_small_1hot.astype('int')
y_test_small_1hot = y_test_small_1hot.astype('int')

In [17]:
# now let's train!
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.3),
          layers.Dense(100,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(256,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(128,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(64,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(32,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=10, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
# clearly we need some strong regularization with this small training set
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(64,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(32,kernel_initializer='lecun_normal'),
          #layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=10, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
# clearly we need some strong regularization with this small training set
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(32,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=200, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [21]:
# clearly we need some strong regularization with this small training set
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=200, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [22]:
# the previous model is starting to look like a real neural network, but one that's overfitting
# maybe more complexity could also help it get toward 100% also, but overfitting is the main concern
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(520,kernel_initializer='lecun_normal',kernel_regularizer='l1'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(8,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=100, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [23]:
# maybe dropout makes a difference
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(520,kernel_initializer='lecun_normal',kernel_regularizer='l1'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(8,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=100, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [25]:
# maybe dropout makes a difference
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(520,kernel_initializer='lecun_normal',kernel_regularizer='l1',bias_regularizer='l1'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          #layers.Dense(8,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          #layers.BatchNormalization(),
          #layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=100, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [26]:
# maybe dropout makes a difference
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(520,kernel_initializer='lecun_normal',kernel_regularizer='l1',bias_regularizer='l1'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(4,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=100, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [27]:
# val loss and train loss were pretty similar, can we add just a little more regularized complexity?
# maybe dropout makes a difference
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(520,kernel_initializer='lecun_normal',kernel_regularizer='l1',bias_regularizer='l1'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(4,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dropout(0.25),
          layers.Dense(16,kernel_initializer='lecun_normal'), 
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=100, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [28]:
# val loss and train loss were pretty similar, can we add just a little more regularized complexity?
# maybe dropout makes a difference
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(520,kernel_initializer='lecun_normal',kernel_regularizer='l1',bias_regularizer='l1'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(8,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dropout(0.25),
          layers.Dense(16,kernel_initializer='lecun_normal'), 
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=100, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
'''
# val loss and train loss were pretty similar, can we add just a little more regularized complexity?
# maybe dropout makes a difference
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(520,kernel_initializer='lecun_normal',kernel_regularizer='l1',bias_regularizer='l1'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(4,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,kernel_initializer='lecun_normal'), 
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=100, validation_data=(x_val_small, y_val_small_1hot))
'''

In [29]:
# further modifying the previous one that had the lowest validation loss
# clearly we need some strong regularization with this small training set
layer_list = [ #520 parameters in input
          layers.BatchNormalization(),
          layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

In [31]:
# further modifying the previous one that had the lowest validation loss
# clearly we need some strong regularization with this small training set
layer_list = [ #520 parameters in input
          layers.Dropout(0.6),
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

KeyboardInterrupt: ignored

In [33]:
layer_list = [ #520 parameters in input
          layers.Dropout(0.6),
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(100,kernel_initializer='lecun_normal'),
          
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
 1/10 [==>...........................] - ETA: 0s - loss: 1.6385 - accuracy: 0.3438

KeyboardInterrupt: ignored

In [34]:
layer_list = [ #520 parameters in input
          layers.Dropout(0.6),
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(100,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
 1/10 [==>...........................] - ETA: 0s - loss: 1.6162 - accuracy: 0.3750

KeyboardInterrupt: ignored

In [35]:
layer_list = [ #520 parameters in input
          layers.Dropout(0.6),
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dropout(0.6),
          layers.Dense(400,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
 1/10 [==>...........

KeyboardInterrupt: ignored

In [36]:
# so I created a model that isn't learning the training set! let's add another layer
layer_list = [ #520 parameters in input
          layers.Dropout(0.6),
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(10,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dropout(0.6),
          layers.Dense(400,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(100,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

KeyboardInterrupt: ignored

In [37]:
# that didn't help
# might need to lighten up on the dimensionality reduction
# so I created a model that isn't learning the training set! let's add another layer
layer_list = [ #520 parameters in input
          layers.Dropout(0.6),
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(40,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dropout(0.6),
          layers.Dense(400,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

KeyboardInterrupt: ignored

In [38]:
# that didn't help
# might need to lighten up on the dimensionality reduction
# so I created a model that isn't learning the training set! let's add another layer
layer_list = [ #520 parameters in input
          layers.Dropout(0.6),
          layers.BatchNormalization(),
          #layers.Dropout(0.4),
          layers.Dense(40,kernel_initializer='lecun_normal'), #let's try dimensionality reduction
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(40,kernel_initializer='lecun_normal',kernel_regularizer='l2'), 
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dropout(0.6),
          layers.Dense(400,kernel_initializer='lecun_normal'),
          layers.BatchNormalization(),
          layers.Activation('selu'),
          layers.Dense(16,activation='softmax',kernel_initializer='lecun_normal'),
          ]
model = Sequential(layer_list)
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001,decay_steps=2000,decay_rate=0.99)),
    loss="categorical_crossentropy", metrics=['accuracy'],)
history = model.fit(x_train_small, y_train_small_1hot, batch_size=32, epochs=400, validation_data=(x_val_small, y_val_small_1hot))

Epoch 1/400

KeyboardInterrupt: ignored

Exception ignored in: 'zmq.backend.cython.message.Frame.__dealloc__'
Traceback (most recent call last):
  File "zmq/backend/cython/checkrc.pxd", line 13, in zmq.backend.cython.checkrc._check_rc
KeyboardInterrupt


Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78/400
Epoch 7


KeyboardInterrupt

