# Scope of jupyter notebook
1. Data preperation steps
2. Fitting the model
3. Comparing the model performance with subset of data

In [1]:
import keras, os, pickle, ast
# import implicit
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from IPython.display import SVG
from keras.models import model_from_json
from keras.optimizers import Adam
from keras.layers import Dense,Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.callbacks import CSVLogger
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG
from keras.callbacks import ReduceLROnPlateau, History
from keras.regularizers import l1,l2
import seaborn as sns
sns.set()


Using TensorFlow backend.


In [2]:
rootPath = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

## Synthetic Data
1. We work with 3000 training examples with this neural network
2. Assuming 10% noise in data, we work with 2700 reliable datasets, 300 noise datsets (random labels)
3. Breaking down the 2700 training examples

#### we break into high, middle and low tier ratings with a few simple rules in place to guide the model to learn
1. high - high happy probability, moderate-low angry/disgusted, handraised 1, sleep 0, headgaze 2
2. mid - moderate emotions probability, handraise random, sleep 0, headgaze 1-2
3. low - low happy probability, moderate-high angry/disgusted, handraised 0, sleep 0/1, headgaze 0,1
4. noise - all are noise
5. engagement - we will take softmax of range 1 to 5, 4-5 being most engaged, 2-3 being mildly engaged, 1 least engaged

In [3]:
df = pd.read_excel('EngagementTest5.xlsx')

#remove first column from importing excel
df = df.drop('Unnamed: 0', axis = 1)

#shuffle dataframe for randomness in splitting
df = df.sample(frac = 1)

In [4]:
df.head(100)

Unnamed: 0,Happy,Angry,Disgusted,HandRaised,Sleep,HeadGaze,Engagement
633,0.220721,0.527092,0.252187,0,0,0,1
188,0.569488,0.090833,0.339680,1,0,2,2
74,0.721591,0.112459,0.165951,0,0,0,1
140,0.619746,0.267334,0.112920,1,0,2,2
932,0.039449,0.119463,0.841088,1,1,0,2
...,...,...,...,...,...,...,...
73,0.727732,0.261432,0.010837,0,0,0,1
733,0.160361,0.090133,0.749506,0,0,1,1
570,0.251263,0.008761,0.739976,0,1,2,1
809,0.112102,0.033060,0.854838,0,0,2,2


In [5]:
#changing values to numpy array
X = df.iloc[:,:6].to_numpy()
Y = df.iloc[:,6:].to_numpy()

print(X.shape)
print(Y.shape)

(1000, 6)
(1000, 1)


In [6]:
#normalizing the data, will help with different ranges of values for different features
min_max_scaler = preprocessing.MinMaxScaler()
# X = min_max_scaler.fit_transform(X)
scaled_df = pd.DataFrame(X)

scaled_df.head()

Unnamed: 0,0,1,2,3,4,5
0,0.220721,0.527092,0.252187,0.0,0.0,0.0
1,0.569488,0.090833,0.33968,1.0,0.0,2.0
2,0.721591,0.112459,0.165951,0.0,0.0,0.0
3,0.619746,0.267334,0.11292,1.0,0.0,2.0
4,0.039449,0.119463,0.841088,1.0,1.0,0.0


In [7]:
#apply one hot encoding to Y values
ohe = OneHotEncoder()
Y = ohe.fit_transform(Y).toarray()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [8]:
Y

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       ...,
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.]])

In [9]:
#Split the dataset into x and y
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(800, 6)
(200, 6)
(800, 5)
(200, 5)


In [10]:
#Building the model, to train this we will build a simple 3 layer NN

def create_model():
    n_cols = X_train.shape[1]
    
    #batch normalization layers need to be added before relu activation
    model = keras.Sequential()
    model.add(Dense(50, activity_regularizer = l2(0.00001), input_shape = (n_cols,)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
              
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(5, activation = 'softmax'))
    
    return model

In [11]:
#training the model
#test if model has any serious loss/accuracy problems
model = create_model()
model.compile(optimizer = Adam(lr = 0.000005), loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
#adding callbacks
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, patience = 50, min_lr = 1e-7, verbose = 1)

history = History()

history = model.fit(X_train, Y_train,
                    validation_split = 0.2,
                    epochs = 1000,
                    batch_size = 64,
                    verbose = 2,
                    callbacks = [reduce_lr, history])


Train on 640 samples, validate on 160 samples
Epoch 1/1000
 - 2s - loss: 2.0504 - categorical_accuracy: 0.2313 - val_loss: 1.6184 - val_categorical_accuracy: 0.1625
Epoch 2/1000
 - 0s - loss: 1.9833 - categorical_accuracy: 0.2672 - val_loss: 1.5878 - val_categorical_accuracy: 0.3313
Epoch 3/1000
 - 0s - loss: 1.9931 - categorical_accuracy: 0.2438 - val_loss: 1.5636 - val_categorical_accuracy: 0.2875
Epoch 4/1000
 - 0s - loss: 2.0277 - categorical_accuracy: 0.2203 - val_loss: 1.5468 - val_categorical_accuracy: 0.3063
Epoch 5/1000
 - 0s - loss: 2.0003 - categorical_accuracy: 0.2359 - val_loss: 1.5291 - val_categorical_accuracy: 0.3000
Epoch 6/1000
 - 0s - loss: 1.9332 - categorical_accuracy: 0.2891 - val_loss: 1.5176 - val_categorical_accuracy: 0.2625
Epoch 7/1000
 - 0s - loss: 2.0112 - categorical_accuracy: 0.2375 - val_loss: 1.5096 - val_categorical_accuracy: 0.2250
Epoch 8/1000
 - 0s - loss: 1.9376 - categorical_accuracy: 0.2594 - val_loss: 1.5007 - val_categorical_accuracy: 0.2313
E

Epoch 68/1000
 - 0s - loss: 1.8479 - categorical_accuracy: 0.2906 - val_loss: 1.3251 - val_categorical_accuracy: 0.4750
Epoch 69/1000
 - 0s - loss: 1.9426 - categorical_accuracy: 0.2766 - val_loss: 1.3247 - val_categorical_accuracy: 0.4875
Epoch 70/1000
 - 0s - loss: 1.8117 - categorical_accuracy: 0.3172 - val_loss: 1.3233 - val_categorical_accuracy: 0.4875
Epoch 71/1000
 - 0s - loss: 1.8874 - categorical_accuracy: 0.2812 - val_loss: 1.3251 - val_categorical_accuracy: 0.4625
Epoch 72/1000
 - 0s - loss: 1.7047 - categorical_accuracy: 0.3641 - val_loss: 1.3268 - val_categorical_accuracy: 0.4688
Epoch 73/1000
 - 0s - loss: 1.7457 - categorical_accuracy: 0.3031 - val_loss: 1.3252 - val_categorical_accuracy: 0.4688
Epoch 74/1000
 - 0s - loss: 1.8534 - categorical_accuracy: 0.2906 - val_loss: 1.3209 - val_categorical_accuracy: 0.4812
Epoch 75/1000
 - 0s - loss: 1.7482 - categorical_accuracy: 0.3250 - val_loss: 1.3198 - val_categorical_accuracy: 0.4625
Epoch 76/1000
 - 0s - loss: 1.8653 - cat

Epoch 136/1000
 - 0s - loss: 1.7381 - categorical_accuracy: 0.3453 - val_loss: 1.2317 - val_categorical_accuracy: 0.6313
Epoch 137/1000
 - 0s - loss: 1.7298 - categorical_accuracy: 0.3422 - val_loss: 1.2332 - val_categorical_accuracy: 0.6375
Epoch 138/1000
 - 0s - loss: 1.6739 - categorical_accuracy: 0.3516 - val_loss: 1.2295 - val_categorical_accuracy: 0.6500
Epoch 139/1000
 - 0s - loss: 1.6765 - categorical_accuracy: 0.3625 - val_loss: 1.2286 - val_categorical_accuracy: 0.6500
Epoch 140/1000
 - 0s - loss: 1.7605 - categorical_accuracy: 0.3438 - val_loss: 1.2252 - val_categorical_accuracy: 0.6500
Epoch 141/1000
 - 0s - loss: 1.7215 - categorical_accuracy: 0.3484 - val_loss: 1.2246 - val_categorical_accuracy: 0.6500
Epoch 142/1000
 - 0s - loss: 1.6870 - categorical_accuracy: 0.3734 - val_loss: 1.2249 - val_categorical_accuracy: 0.6500
Epoch 143/1000
 - 0s - loss: 1.7447 - categorical_accuracy: 0.3641 - val_loss: 1.2250 - val_categorical_accuracy: 0.6500
Epoch 144/1000
 - 0s - loss: 1.6

Epoch 204/1000
 - 0s - loss: 1.5935 - categorical_accuracy: 0.3953 - val_loss: 1.1614 - val_categorical_accuracy: 0.6438
Epoch 205/1000
 - 0s - loss: 1.6594 - categorical_accuracy: 0.3797 - val_loss: 1.1612 - val_categorical_accuracy: 0.6438
Epoch 206/1000
 - 0s - loss: 1.6208 - categorical_accuracy: 0.3859 - val_loss: 1.1609 - val_categorical_accuracy: 0.6438
Epoch 207/1000
 - 0s - loss: 1.5907 - categorical_accuracy: 0.3984 - val_loss: 1.1601 - val_categorical_accuracy: 0.6438
Epoch 208/1000
 - 0s - loss: 1.5612 - categorical_accuracy: 0.4078 - val_loss: 1.1611 - val_categorical_accuracy: 0.6438
Epoch 209/1000
 - 0s - loss: 1.6790 - categorical_accuracy: 0.3906 - val_loss: 1.1604 - val_categorical_accuracy: 0.6438
Epoch 210/1000
 - 0s - loss: 1.5470 - categorical_accuracy: 0.4156 - val_loss: 1.1596 - val_categorical_accuracy: 0.6438
Epoch 211/1000
 - 0s - loss: 1.5375 - categorical_accuracy: 0.3734 - val_loss: 1.1562 - val_categorical_accuracy: 0.6438
Epoch 212/1000
 - 0s - loss: 1.5

Epoch 272/1000
 - 0s - loss: 1.5502 - categorical_accuracy: 0.4453 - val_loss: 1.1091 - val_categorical_accuracy: 0.6562
Epoch 273/1000
 - 0s - loss: 1.5522 - categorical_accuracy: 0.4266 - val_loss: 1.1083 - val_categorical_accuracy: 0.6500
Epoch 274/1000
 - 0s - loss: 1.5143 - categorical_accuracy: 0.4281 - val_loss: 1.1074 - val_categorical_accuracy: 0.6500
Epoch 275/1000
 - 0s - loss: 1.5294 - categorical_accuracy: 0.4219 - val_loss: 1.1059 - val_categorical_accuracy: 0.6562
Epoch 276/1000
 - 0s - loss: 1.5380 - categorical_accuracy: 0.4328 - val_loss: 1.1045 - val_categorical_accuracy: 0.6500
Epoch 277/1000
 - 0s - loss: 1.5037 - categorical_accuracy: 0.4328 - val_loss: 1.1044 - val_categorical_accuracy: 0.6562
Epoch 278/1000
 - 0s - loss: 1.5271 - categorical_accuracy: 0.4391 - val_loss: 1.1050 - val_categorical_accuracy: 0.6500
Epoch 279/1000
 - 0s - loss: 1.5825 - categorical_accuracy: 0.4094 - val_loss: 1.1039 - val_categorical_accuracy: 0.6500
Epoch 280/1000
 - 0s - loss: 1.6

Epoch 340/1000
 - 0s - loss: 1.5095 - categorical_accuracy: 0.4484 - val_loss: 1.0686 - val_categorical_accuracy: 0.6500
Epoch 341/1000
 - 0s - loss: 1.4916 - categorical_accuracy: 0.4516 - val_loss: 1.0681 - val_categorical_accuracy: 0.6500
Epoch 342/1000
 - 0s - loss: 1.4222 - categorical_accuracy: 0.4500 - val_loss: 1.0673 - val_categorical_accuracy: 0.6500
Epoch 343/1000
 - 0s - loss: 1.4236 - categorical_accuracy: 0.4625 - val_loss: 1.0671 - val_categorical_accuracy: 0.6500
Epoch 344/1000
 - 0s - loss: 1.4402 - categorical_accuracy: 0.4578 - val_loss: 1.0662 - val_categorical_accuracy: 0.6500
Epoch 345/1000
 - 0s - loss: 1.4375 - categorical_accuracy: 0.4797 - val_loss: 1.0661 - val_categorical_accuracy: 0.6500
Epoch 346/1000
 - 0s - loss: 1.5143 - categorical_accuracy: 0.4484 - val_loss: 1.0653 - val_categorical_accuracy: 0.6500
Epoch 347/1000
 - 0s - loss: 1.4772 - categorical_accuracy: 0.4500 - val_loss: 1.0642 - val_categorical_accuracy: 0.6500
Epoch 348/1000
 - 0s - loss: 1.5

Epoch 408/1000
 - 0s - loss: 1.4533 - categorical_accuracy: 0.4594 - val_loss: 1.0355 - val_categorical_accuracy: 0.6562
Epoch 409/1000
 - 0s - loss: 1.5229 - categorical_accuracy: 0.4656 - val_loss: 1.0363 - val_categorical_accuracy: 0.6500
Epoch 410/1000
 - 0s - loss: 1.4696 - categorical_accuracy: 0.4531 - val_loss: 1.0358 - val_categorical_accuracy: 0.6562
Epoch 411/1000
 - 0s - loss: 1.3905 - categorical_accuracy: 0.4750 - val_loss: 1.0359 - val_categorical_accuracy: 0.6562
Epoch 412/1000
 - 0s - loss: 1.4586 - categorical_accuracy: 0.4656 - val_loss: 1.0367 - val_categorical_accuracy: 0.6562
Epoch 413/1000
 - 0s - loss: 1.4839 - categorical_accuracy: 0.4359 - val_loss: 1.0369 - val_categorical_accuracy: 0.6562
Epoch 414/1000
 - 0s - loss: 1.4327 - categorical_accuracy: 0.4812 - val_loss: 1.0369 - val_categorical_accuracy: 0.6500
Epoch 415/1000
 - 0s - loss: 1.4895 - categorical_accuracy: 0.4328 - val_loss: 1.0366 - val_categorical_accuracy: 0.6562
Epoch 416/1000
 - 0s - loss: 1.3

Epoch 476/1000
 - 0s - loss: 1.4122 - categorical_accuracy: 0.4859 - val_loss: 1.0168 - val_categorical_accuracy: 0.6375
Epoch 477/1000
 - 0s - loss: 1.4653 - categorical_accuracy: 0.4641 - val_loss: 1.0167 - val_categorical_accuracy: 0.6375
Epoch 478/1000
 - 0s - loss: 1.4364 - categorical_accuracy: 0.4797 - val_loss: 1.0157 - val_categorical_accuracy: 0.6438
Epoch 479/1000
 - 0s - loss: 1.4397 - categorical_accuracy: 0.4969 - val_loss: 1.0149 - val_categorical_accuracy: 0.6375
Epoch 480/1000
 - 0s - loss: 1.4547 - categorical_accuracy: 0.4812 - val_loss: 1.0151 - val_categorical_accuracy: 0.6438
Epoch 481/1000
 - 0s - loss: 1.4069 - categorical_accuracy: 0.4453 - val_loss: 1.0132 - val_categorical_accuracy: 0.6438
Epoch 482/1000
 - 0s - loss: 1.3799 - categorical_accuracy: 0.5063 - val_loss: 1.0121 - val_categorical_accuracy: 0.6438
Epoch 483/1000
 - 0s - loss: 1.3631 - categorical_accuracy: 0.5047 - val_loss: 1.0113 - val_categorical_accuracy: 0.6438
Epoch 484/1000
 - 0s - loss: 1.3

Epoch 544/1000
 - 0s - loss: 1.4599 - categorical_accuracy: 0.4797 - val_loss: 0.9952 - val_categorical_accuracy: 0.6500
Epoch 545/1000
 - 0s - loss: 1.3229 - categorical_accuracy: 0.4953 - val_loss: 0.9953 - val_categorical_accuracy: 0.6500
Epoch 546/1000
 - 0s - loss: 1.4151 - categorical_accuracy: 0.4750 - val_loss: 0.9949 - val_categorical_accuracy: 0.6438
Epoch 547/1000
 - 0s - loss: 1.4093 - categorical_accuracy: 0.4984 - val_loss: 0.9948 - val_categorical_accuracy: 0.6500
Epoch 548/1000
 - 0s - loss: 1.4110 - categorical_accuracy: 0.4938 - val_loss: 0.9948 - val_categorical_accuracy: 0.6500
Epoch 549/1000
 - 0s - loss: 1.4333 - categorical_accuracy: 0.4969 - val_loss: 0.9946 - val_categorical_accuracy: 0.6500
Epoch 550/1000
 - 0s - loss: 1.3816 - categorical_accuracy: 0.4828 - val_loss: 0.9939 - val_categorical_accuracy: 0.6438
Epoch 551/1000
 - 0s - loss: 1.3962 - categorical_accuracy: 0.4953 - val_loss: 0.9938 - val_categorical_accuracy: 0.6500
Epoch 552/1000
 - 0s - loss: 1.4

Epoch 612/1000
 - 0s - loss: 1.3408 - categorical_accuracy: 0.5172 - val_loss: 0.9831 - val_categorical_accuracy: 0.6562
Epoch 613/1000
 - 0s - loss: 1.4181 - categorical_accuracy: 0.5078 - val_loss: 0.9837 - val_categorical_accuracy: 0.6500
Epoch 614/1000
 - 0s - loss: 1.3371 - categorical_accuracy: 0.5109 - val_loss: 0.9831 - val_categorical_accuracy: 0.6500
Epoch 615/1000
 - 0s - loss: 1.3491 - categorical_accuracy: 0.5125 - val_loss: 0.9823 - val_categorical_accuracy: 0.6500
Epoch 616/1000
 - 0s - loss: 1.3684 - categorical_accuracy: 0.4812 - val_loss: 0.9812 - val_categorical_accuracy: 0.6500
Epoch 617/1000
 - 0s - loss: 1.3578 - categorical_accuracy: 0.5203 - val_loss: 0.9801 - val_categorical_accuracy: 0.6500
Epoch 618/1000
 - 0s - loss: 1.3768 - categorical_accuracy: 0.4953 - val_loss: 0.9806 - val_categorical_accuracy: 0.6500
Epoch 619/1000
 - 0s - loss: 1.3794 - categorical_accuracy: 0.5000 - val_loss: 0.9804 - val_categorical_accuracy: 0.6500
Epoch 620/1000
 - 0s - loss: 1.3

Epoch 680/1000
 - 0s - loss: 1.2485 - categorical_accuracy: 0.5297 - val_loss: 0.9733 - val_categorical_accuracy: 0.6375
Epoch 681/1000
 - 0s - loss: 1.3701 - categorical_accuracy: 0.5078 - val_loss: 0.9729 - val_categorical_accuracy: 0.6375
Epoch 682/1000
 - 0s - loss: 1.2949 - categorical_accuracy: 0.5234 - val_loss: 0.9732 - val_categorical_accuracy: 0.6375
Epoch 683/1000
 - 0s - loss: 1.3294 - categorical_accuracy: 0.5234 - val_loss: 0.9725 - val_categorical_accuracy: 0.6375
Epoch 684/1000
 - 0s - loss: 1.3751 - categorical_accuracy: 0.4922 - val_loss: 0.9719 - val_categorical_accuracy: 0.6375
Epoch 685/1000
 - 0s - loss: 1.3020 - categorical_accuracy: 0.5344 - val_loss: 0.9721 - val_categorical_accuracy: 0.6375
Epoch 686/1000
 - 0s - loss: 1.3472 - categorical_accuracy: 0.4969 - val_loss: 0.9717 - val_categorical_accuracy: 0.6375
Epoch 687/1000
 - 0s - loss: 1.3631 - categorical_accuracy: 0.5047 - val_loss: 0.9701 - val_categorical_accuracy: 0.6375
Epoch 688/1000
 - 0s - loss: 1.3

Epoch 748/1000
 - 0s - loss: 1.3700 - categorical_accuracy: 0.5000 - val_loss: 0.9625 - val_categorical_accuracy: 0.6375
Epoch 749/1000
 - 0s - loss: 1.3363 - categorical_accuracy: 0.5125 - val_loss: 0.9628 - val_categorical_accuracy: 0.6375
Epoch 750/1000
 - 0s - loss: 1.3948 - categorical_accuracy: 0.5016 - val_loss: 0.9634 - val_categorical_accuracy: 0.6375
Epoch 751/1000
 - 0s - loss: 1.3173 - categorical_accuracy: 0.5266 - val_loss: 0.9629 - val_categorical_accuracy: 0.6375
Epoch 752/1000
 - 0s - loss: 1.3135 - categorical_accuracy: 0.5000 - val_loss: 0.9613 - val_categorical_accuracy: 0.6375
Epoch 753/1000
 - 0s - loss: 1.2623 - categorical_accuracy: 0.5281 - val_loss: 0.9612 - val_categorical_accuracy: 0.6375
Epoch 754/1000
 - 0s - loss: 1.4125 - categorical_accuracy: 0.5125 - val_loss: 0.9613 - val_categorical_accuracy: 0.6375
Epoch 755/1000
 - 0s - loss: 1.3509 - categorical_accuracy: 0.5172 - val_loss: 0.9610 - val_categorical_accuracy: 0.6438
Epoch 756/1000
 - 0s - loss: 1.2

Epoch 816/1000
 - 0s - loss: 1.2952 - categorical_accuracy: 0.5344 - val_loss: 0.9566 - val_categorical_accuracy: 0.6375
Epoch 817/1000
 - 0s - loss: 1.2934 - categorical_accuracy: 0.5266 - val_loss: 0.9562 - val_categorical_accuracy: 0.6375
Epoch 818/1000
 - 0s - loss: 1.3111 - categorical_accuracy: 0.5281 - val_loss: 0.9564 - val_categorical_accuracy: 0.6375
Epoch 819/1000
 - 0s - loss: 1.2961 - categorical_accuracy: 0.5188 - val_loss: 0.9557 - val_categorical_accuracy: 0.6375
Epoch 820/1000
 - 0s - loss: 1.3105 - categorical_accuracy: 0.5297 - val_loss: 0.9555 - val_categorical_accuracy: 0.6375
Epoch 821/1000
 - 0s - loss: 1.2659 - categorical_accuracy: 0.5406 - val_loss: 0.9554 - val_categorical_accuracy: 0.6438
Epoch 822/1000
 - 0s - loss: 1.2823 - categorical_accuracy: 0.5031 - val_loss: 0.9556 - val_categorical_accuracy: 0.6438
Epoch 823/1000
 - 0s - loss: 1.2830 - categorical_accuracy: 0.5375 - val_loss: 0.9565 - val_categorical_accuracy: 0.6375
Epoch 824/1000
 - 0s - loss: 1.3

Epoch 884/1000
 - 0s - loss: 1.3117 - categorical_accuracy: 0.5266 - val_loss: 0.9512 - val_categorical_accuracy: 0.6500
Epoch 885/1000
 - 0s - loss: 1.3063 - categorical_accuracy: 0.5437 - val_loss: 0.9513 - val_categorical_accuracy: 0.6562
Epoch 886/1000
 - 0s - loss: 1.2744 - categorical_accuracy: 0.5188 - val_loss: 0.9519 - val_categorical_accuracy: 0.6500
Epoch 887/1000
 - 0s - loss: 1.2349 - categorical_accuracy: 0.5453 - val_loss: 0.9518 - val_categorical_accuracy: 0.6562
Epoch 888/1000
 - 0s - loss: 1.2977 - categorical_accuracy: 0.5203 - val_loss: 0.9511 - val_categorical_accuracy: 0.6562
Epoch 889/1000
 - 0s - loss: 1.3418 - categorical_accuracy: 0.4969 - val_loss: 0.9517 - val_categorical_accuracy: 0.6562
Epoch 890/1000
 - 0s - loss: 1.3236 - categorical_accuracy: 0.5141 - val_loss: 0.9517 - val_categorical_accuracy: 0.6562
Epoch 891/1000
 - 0s - loss: 1.2781 - categorical_accuracy: 0.5312 - val_loss: 0.9520 - val_categorical_accuracy: 0.6562
Epoch 892/1000
 - 0s - loss: 1.3

Epoch 952/1000
 - 0s - loss: 1.3167 - categorical_accuracy: 0.5172 - val_loss: 0.9455 - val_categorical_accuracy: 0.6625
Epoch 953/1000
 - 0s - loss: 1.1930 - categorical_accuracy: 0.5594 - val_loss: 0.9456 - val_categorical_accuracy: 0.6625
Epoch 954/1000
 - 0s - loss: 1.2605 - categorical_accuracy: 0.5297 - val_loss: 0.9458 - val_categorical_accuracy: 0.6625
Epoch 955/1000
 - 0s - loss: 1.2838 - categorical_accuracy: 0.5250 - val_loss: 0.9457 - val_categorical_accuracy: 0.6625
Epoch 956/1000
 - 0s - loss: 1.2697 - categorical_accuracy: 0.5578 - val_loss: 0.9463 - val_categorical_accuracy: 0.6687
Epoch 957/1000
 - 0s - loss: 1.3048 - categorical_accuracy: 0.5328 - val_loss: 0.9459 - val_categorical_accuracy: 0.6625
Epoch 958/1000
 - 0s - loss: 1.2965 - categorical_accuracy: 0.5437 - val_loss: 0.9455 - val_categorical_accuracy: 0.6625
Epoch 959/1000
 - 0s - loss: 1.3075 - categorical_accuracy: 0.5359 - val_loss: 0.9461 - val_categorical_accuracy: 0.6625
Epoch 960/1000
 - 0s - loss: 1.2

In [12]:
scores = model.evaluate(X_train, Y_train, batch_size=30)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

categorical_accuracy: 66.50%


In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [None]:
X_test[:1].shape

In [None]:
print(X_test[:1])
predictions = model.predict_classes(X_test[:1], verbose = 1) + 1
print()
print(predictions)

In [None]:
print(X_test[:1])
predictions = model.predict_classes(X_test[:1], verbose = 1) + 1
print()
print(predictions)

In [None]:
import time
i = 0

def hello(model):
    global i
    print("read csv, Interval {}".format(i))
    predictions = model.predict(X_test[:1], verbose = 1)
    print(predictions)
    i = i + 1
    
nexttime = time.time()
while True:
    hello(model)          # take t sec
    nexttime += 5
    sleeptime = nexttime - time.time()
    if sleeptime > 0:
        time.sleep(sleeptime)

### Comparing the effects of training the model on various subsamples of the data

In [None]:
result_dict = {}
subset_data_list = [0.2, 0.4, 0.6, 0.8]

for subset_data in subset_data_list:
    #training the model on a proportion of data and save the loss and accuracy
    
    model = create_model()
    model.compile(optimizer = Adam(lr = 0.000005), loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
    
    #adding callbacks
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor=0.5,
                                  patience=50, min_lr=1e-7, verbose = 1)
    history = History()
    logger = CSVLogger(os.path.join(rootPath, 'logs', 'log_results-{}.csv'.format(subset_data)))
    
    #training the model with subset of data
    history = model.fit(X_train, Y_train,
                        validation_split = 1 - subset_data,
                        epochs = 1000,
                        batch_size = 64,
                        verbose = 2,
                        callbacks = [reduce_lr, history, logger])
    
    result_dict[subset_data] = {
        'training_loss' : history.history['loss'],
        'validation_loss': history.history['val_loss'],
        'training_accuracy': history.history['categorical_accuracy'],
        'validation_accuracy': history.history['val_categorical_accuracy']
    }

In [None]:
def plotGraph(result_dict, plot_var):
    #this plots a graph of training loss vs epochs
    for subset_data in result_dict.keys():
        ax = sns.lineplot(x = np.arange(1000), 
                          y = result_dict[subset_data][plot_var], label =subset_data)
        ax.set(xlabel = 'training epoch', ylabel = plot_var)
        
    if plot_var == "training_loss":
        #rescale for easier comparison
        ax.set_ylim(1, 2.5)

        
    ax.set_title('{} with epoch'.format(plot_var))
    ax.legend()
    
    return ax

In [None]:
plotGraph(result_dict, 'training_loss')

In [None]:
plotGraph(result_dict,'validation_loss')

In [None]:
plotGraph(result_dict,'training_accuracy')

In [None]:
plotGraph(result_dict,'validation_accuracy')

In [None]:
predictions = model.predict(X_test[:5], verbose = 1)
# predictions = model.predict_classes(X_test[:50], verbose=1)

In [None]:
print(X_test[:5])

In [None]:
print(predictions)