# Scope of jupyter notebook
1. Data preperation steps
2. Fitting the model
3. Comparing the model performance with subset of data

In [1]:
import keras, os, pickle, ast
# import implicit
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from IPython.display import SVG
from keras.models import model_from_json
from keras.optimizers import Adam
from keras.layers import Dense,Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.callbacks import CSVLogger
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG
from keras.callbacks import ReduceLROnPlateau, History
from keras.regularizers import l1,l2
import seaborn as sns
sns.set()


Using TensorFlow backend.


In [2]:
rootPath = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

## Synthetic Data
1. We work with 3000 training examples with this neural network
2. Assuming 10% noise in data, we work with 2700 reliable datasets, 300 noise datsets (random labels)
3. Breaking down the 2700 training examples

#### we break into high, middle and low tier ratings with a few simple rules in place to guide the model to learn
1. high - high happy probability, moderate-low angry/disgusted, handraised 1, sleep 0, headgaze 2
2. mid - moderate emotions probability, handraise random, sleep 0, headgaze 1-2
3. low - low happy probability, moderate-high angry/disgusted, handraised 0, sleep 0/1, headgaze 0,1
4. noise - all are noise
5. engagement - we will take softmax of range 1 to 5, 4-5 being most engaged, 2-3 being mildly engaged, 1 least engaged

In [3]:
df = pd.read_excel('EngagementTest5.xlsx')

#remove first column from importing excel
df = df.drop('Unnamed: 0', axis = 1)

#shuffle dataframe for randomness in splitting
df = df.sample(frac = 1)

In [4]:
df.head(100)

Unnamed: 0,Happy,Angry,Disgusted,HandRaised,Sleep,HeadGaze,Engagement
594,0.237095,0.014371,0.748534,0,1,2,1
540,0.272071,0.048970,0.678959,0,1,2,1
903,0.055106,0.803097,0.141797,1,1,1,3
241,0.511740,0.214462,0.273798,0,1,1,1
282,0.470297,0.187755,0.341948,0,1,2,1
...,...,...,...,...,...,...,...
922,0.045606,0.049183,0.905211,0,0,1,1
603,0.232670,0.196629,0.570701,0,0,0,1
121,0.650005,0.276586,0.073409,1,0,2,3
972,0.015990,0.819369,0.164641,0,1,1,5


In [5]:
#changing values to numpy array
X = df.iloc[:,:6].to_numpy()
Y = df.iloc[:,6:].to_numpy()

print(X.shape)
print(Y.shape)

(1000, 6)
(1000, 1)


In [6]:
#normalizing the data, will help with different ranges of values for different features
min_max_scaler = preprocessing.MinMaxScaler()
# X = min_max_scaler.fit_transform(X)
scaled_df = pd.DataFrame(X)

scaled_df.head()

Unnamed: 0,0,1,2,3,4,5
0,0.237095,0.014371,0.748534,0.0,1.0,2.0
1,0.272071,0.04897,0.678959,0.0,1.0,2.0
2,0.055106,0.803097,0.141797,1.0,1.0,1.0
3,0.51174,0.214462,0.273798,0.0,1.0,1.0
4,0.470297,0.187755,0.341948,0.0,1.0,2.0


In [7]:
#apply one hot encoding to Y values
ohe = OneHotEncoder()
Y = ohe.fit_transform(Y).toarray()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [8]:
Y

array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       ...,
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.]])

In [9]:
#Split the dataset into x and y
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(800, 6)
(200, 6)
(800, 5)
(200, 5)


In [10]:
#Building the model, to train this we will build a simple 3 layer NN

def create_model():
    n_cols = X_train.shape[1]
    
    #batch normalization layers need to be added before relu activation
    model = keras.Sequential()
    model.add(Dense(50, activity_regularizer = l2(0.00001), input_shape = (n_cols,)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
              
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(200))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(5, activation = 'softmax'))
    
    return model

In [11]:
#training the model
#test if model has any serious loss/accuracy problems
model = create_model()
model.compile(optimizer = Adam(lr = 0.000005), loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
#adding callbacks
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, patience = 50, min_lr = 1e-7, verbose = 1)

history = History()

history = model.fit(X_train, Y_train,
                    validation_split = 0.2,
                    epochs = 1000,
                    batch_size = 64,
                    verbose = 2,
                    callbacks = [reduce_lr, history])


Train on 640 samples, validate on 160 samples
Epoch 1/1000
 - 5s - loss: 2.1635 - categorical_accuracy: 0.2016 - val_loss: 1.6171 - val_categorical_accuracy: 0.2125
Epoch 2/1000
 - 0s - loss: 2.0759 - categorical_accuracy: 0.1937 - val_loss: 1.6179 - val_categorical_accuracy: 0.2375
Epoch 3/1000
 - 0s - loss: 2.1384 - categorical_accuracy: 0.2047 - val_loss: 1.6178 - val_categorical_accuracy: 0.2375
Epoch 4/1000
 - 0s - loss: 2.1174 - categorical_accuracy: 0.2125 - val_loss: 1.6197 - val_categorical_accuracy: 0.2375
Epoch 5/1000
 - 0s - loss: 2.1841 - categorical_accuracy: 0.2078 - val_loss: 1.6188 - val_categorical_accuracy: 0.2375
Epoch 6/1000
 - 0s - loss: 2.1673 - categorical_accuracy: 0.2109 - val_loss: 1.6190 - val_categorical_accuracy: 0.2375
Epoch 7/1000
 - 0s - loss: 2.0726 - categorical_accuracy: 0.2203 - val_loss: 1.6199 - val_categorical_accuracy: 0.2375
Epoch 8/1000
 - 0s - loss: 2.0354 - categorical_accuracy: 0.2328 - val_loss: 1.6216 - val_categorical_accuracy: 0.2375
E

Epoch 68/1000
 - 0s - loss: 1.9534 - categorical_accuracy: 0.2516 - val_loss: 1.5567 - val_categorical_accuracy: 0.2562
Epoch 69/1000
 - 0s - loss: 2.0044 - categorical_accuracy: 0.2359 - val_loss: 1.5548 - val_categorical_accuracy: 0.2562
Epoch 70/1000
 - 0s - loss: 1.9190 - categorical_accuracy: 0.2656 - val_loss: 1.5527 - val_categorical_accuracy: 0.2625
Epoch 71/1000
 - 0s - loss: 1.9461 - categorical_accuracy: 0.2438 - val_loss: 1.5518 - val_categorical_accuracy: 0.2750
Epoch 72/1000
 - 0s - loss: 1.9453 - categorical_accuracy: 0.2609 - val_loss: 1.5490 - val_categorical_accuracy: 0.2812
Epoch 73/1000
 - 0s - loss: 1.9476 - categorical_accuracy: 0.2625 - val_loss: 1.5462 - val_categorical_accuracy: 0.2875
Epoch 74/1000
 - 0s - loss: 1.9152 - categorical_accuracy: 0.2703 - val_loss: 1.5424 - val_categorical_accuracy: 0.2875
Epoch 75/1000
 - 0s - loss: 1.9068 - categorical_accuracy: 0.2656 - val_loss: 1.5407 - val_categorical_accuracy: 0.2875
Epoch 76/1000
 - 0s - loss: 1.9148 - cat

Epoch 136/1000
 - 0s - loss: 1.8307 - categorical_accuracy: 0.2922 - val_loss: 1.4685 - val_categorical_accuracy: 0.3250
Epoch 137/1000
 - 0s - loss: 1.7901 - categorical_accuracy: 0.3000 - val_loss: 1.4668 - val_categorical_accuracy: 0.3250
Epoch 138/1000
 - 0s - loss: 1.8049 - categorical_accuracy: 0.3313 - val_loss: 1.4654 - val_categorical_accuracy: 0.3250
Epoch 139/1000
 - 0s - loss: 1.8155 - categorical_accuracy: 0.3203 - val_loss: 1.4629 - val_categorical_accuracy: 0.3250
Epoch 140/1000
 - 0s - loss: 1.7438 - categorical_accuracy: 0.3031 - val_loss: 1.4622 - val_categorical_accuracy: 0.3250
Epoch 141/1000
 - 0s - loss: 1.8540 - categorical_accuracy: 0.2953 - val_loss: 1.4606 - val_categorical_accuracy: 0.3250
Epoch 142/1000
 - 0s - loss: 1.8010 - categorical_accuracy: 0.3031 - val_loss: 1.4600 - val_categorical_accuracy: 0.3250
Epoch 143/1000
 - 0s - loss: 1.7814 - categorical_accuracy: 0.3281 - val_loss: 1.4571 - val_categorical_accuracy: 0.3250
Epoch 144/1000
 - 0s - loss: 1.7

Epoch 204/1000
 - 0s - loss: 1.6758 - categorical_accuracy: 0.3313 - val_loss: 1.3895 - val_categorical_accuracy: 0.3250
Epoch 205/1000
 - 0s - loss: 1.7266 - categorical_accuracy: 0.3313 - val_loss: 1.3895 - val_categorical_accuracy: 0.3250
Epoch 206/1000
 - 0s - loss: 1.6577 - categorical_accuracy: 0.3625 - val_loss: 1.3876 - val_categorical_accuracy: 0.3250
Epoch 207/1000
 - 0s - loss: 1.6881 - categorical_accuracy: 0.3313 - val_loss: 1.3855 - val_categorical_accuracy: 0.3250
Epoch 208/1000
 - 0s - loss: 1.7351 - categorical_accuracy: 0.3250 - val_loss: 1.3848 - val_categorical_accuracy: 0.3250
Epoch 209/1000
 - 0s - loss: 1.6472 - categorical_accuracy: 0.3438 - val_loss: 1.3817 - val_categorical_accuracy: 0.3250
Epoch 210/1000
 - 0s - loss: 1.7619 - categorical_accuracy: 0.3203 - val_loss: 1.3808 - val_categorical_accuracy: 0.3250
Epoch 211/1000
 - 0s - loss: 1.6762 - categorical_accuracy: 0.3516 - val_loss: 1.3799 - val_categorical_accuracy: 0.3250
Epoch 212/1000
 - 0s - loss: 1.7

Epoch 272/1000
 - 0s - loss: 1.5251 - categorical_accuracy: 0.4016 - val_loss: 1.3158 - val_categorical_accuracy: 0.3250
Epoch 273/1000
 - 0s - loss: 1.6271 - categorical_accuracy: 0.3984 - val_loss: 1.3170 - val_categorical_accuracy: 0.3250
Epoch 274/1000
 - 0s - loss: 1.6546 - categorical_accuracy: 0.3750 - val_loss: 1.3165 - val_categorical_accuracy: 0.3250
Epoch 275/1000
 - 0s - loss: 1.6006 - categorical_accuracy: 0.4094 - val_loss: 1.3128 - val_categorical_accuracy: 0.3250
Epoch 276/1000
 - 0s - loss: 1.6180 - categorical_accuracy: 0.3812 - val_loss: 1.3102 - val_categorical_accuracy: 0.3250
Epoch 277/1000
 - 0s - loss: 1.5981 - categorical_accuracy: 0.4000 - val_loss: 1.3074 - val_categorical_accuracy: 0.3250
Epoch 278/1000
 - 0s - loss: 1.6414 - categorical_accuracy: 0.3688 - val_loss: 1.3077 - val_categorical_accuracy: 0.3250
Epoch 279/1000
 - 0s - loss: 1.6184 - categorical_accuracy: 0.3984 - val_loss: 1.3060 - val_categorical_accuracy: 0.3250
Epoch 280/1000
 - 0s - loss: 1.6

Epoch 340/1000
 - 0s - loss: 1.5417 - categorical_accuracy: 0.4172 - val_loss: 1.2535 - val_categorical_accuracy: 0.4250
Epoch 341/1000
 - 0s - loss: 1.5099 - categorical_accuracy: 0.4375 - val_loss: 1.2542 - val_categorical_accuracy: 0.4250
Epoch 342/1000
 - 0s - loss: 1.5817 - categorical_accuracy: 0.4047 - val_loss: 1.2512 - val_categorical_accuracy: 0.4250
Epoch 343/1000
 - 0s - loss: 1.4940 - categorical_accuracy: 0.4141 - val_loss: 1.2481 - val_categorical_accuracy: 0.4250
Epoch 344/1000
 - 0s - loss: 1.5597 - categorical_accuracy: 0.3984 - val_loss: 1.2474 - val_categorical_accuracy: 0.4250
Epoch 345/1000
 - 0s - loss: 1.5634 - categorical_accuracy: 0.4094 - val_loss: 1.2469 - val_categorical_accuracy: 0.4250
Epoch 346/1000
 - 0s - loss: 1.5553 - categorical_accuracy: 0.4172 - val_loss: 1.2444 - val_categorical_accuracy: 0.4313
Epoch 347/1000
 - 0s - loss: 1.5174 - categorical_accuracy: 0.4187 - val_loss: 1.2448 - val_categorical_accuracy: 0.4313
Epoch 348/1000
 - 0s - loss: 1.5

Epoch 408/1000
 - 0s - loss: 1.5365 - categorical_accuracy: 0.4266 - val_loss: 1.2081 - val_categorical_accuracy: 0.4875
Epoch 409/1000
 - 0s - loss: 1.4604 - categorical_accuracy: 0.4406 - val_loss: 1.2082 - val_categorical_accuracy: 0.4938
Epoch 410/1000
 - 0s - loss: 1.5386 - categorical_accuracy: 0.4375 - val_loss: 1.2072 - val_categorical_accuracy: 0.5000
Epoch 411/1000
 - 0s - loss: 1.5057 - categorical_accuracy: 0.4234 - val_loss: 1.2061 - val_categorical_accuracy: 0.5250
Epoch 412/1000
 - 0s - loss: 1.5657 - categorical_accuracy: 0.4359 - val_loss: 1.2055 - val_categorical_accuracy: 0.5250
Epoch 413/1000
 - 0s - loss: 1.4752 - categorical_accuracy: 0.4563 - val_loss: 1.2052 - val_categorical_accuracy: 0.5312
Epoch 414/1000
 - 0s - loss: 1.4507 - categorical_accuracy: 0.4656 - val_loss: 1.2069 - val_categorical_accuracy: 0.5250
Epoch 415/1000
 - 0s - loss: 1.5600 - categorical_accuracy: 0.4203 - val_loss: 1.2061 - val_categorical_accuracy: 0.5312
Epoch 416/1000
 - 0s - loss: 1.4

Epoch 476/1000
 - 0s - loss: 1.5105 - categorical_accuracy: 0.4344 - val_loss: 1.1660 - val_categorical_accuracy: 0.5813
Epoch 477/1000
 - 0s - loss: 1.4476 - categorical_accuracy: 0.4578 - val_loss: 1.1664 - val_categorical_accuracy: 0.5813
Epoch 478/1000
 - 0s - loss: 1.4404 - categorical_accuracy: 0.4688 - val_loss: 1.1655 - val_categorical_accuracy: 0.5813
Epoch 479/1000
 - 0s - loss: 1.5550 - categorical_accuracy: 0.4734 - val_loss: 1.1631 - val_categorical_accuracy: 0.5813
Epoch 480/1000
 - 0s - loss: 1.4811 - categorical_accuracy: 0.4516 - val_loss: 1.1620 - val_categorical_accuracy: 0.5813
Epoch 481/1000
 - 0s - loss: 1.4729 - categorical_accuracy: 0.4453 - val_loss: 1.1610 - val_categorical_accuracy: 0.5813
Epoch 482/1000
 - 0s - loss: 1.4849 - categorical_accuracy: 0.4641 - val_loss: 1.1621 - val_categorical_accuracy: 0.5813
Epoch 483/1000
 - 0s - loss: 1.4971 - categorical_accuracy: 0.4641 - val_loss: 1.1612 - val_categorical_accuracy: 0.5813
Epoch 484/1000
 - 0s - loss: 1.4

Epoch 544/1000
 - 0s - loss: 1.4375 - categorical_accuracy: 0.4594 - val_loss: 1.1313 - val_categorical_accuracy: 0.5813
Epoch 545/1000
 - 0s - loss: 1.4639 - categorical_accuracy: 0.4750 - val_loss: 1.1298 - val_categorical_accuracy: 0.5813
Epoch 546/1000
 - 0s - loss: 1.4948 - categorical_accuracy: 0.4781 - val_loss: 1.1289 - val_categorical_accuracy: 0.5813
Epoch 547/1000
 - 0s - loss: 1.4574 - categorical_accuracy: 0.4563 - val_loss: 1.1290 - val_categorical_accuracy: 0.5813
Epoch 548/1000
 - 0s - loss: 1.4529 - categorical_accuracy: 0.4875 - val_loss: 1.1291 - val_categorical_accuracy: 0.5813
Epoch 549/1000
 - 0s - loss: 1.4422 - categorical_accuracy: 0.4922 - val_loss: 1.1294 - val_categorical_accuracy: 0.5813
Epoch 550/1000
 - 0s - loss: 1.4346 - categorical_accuracy: 0.4734 - val_loss: 1.1297 - val_categorical_accuracy: 0.5813
Epoch 551/1000
 - 0s - loss: 1.4422 - categorical_accuracy: 0.4625 - val_loss: 1.1297 - val_categorical_accuracy: 0.5813
Epoch 552/1000
 - 0s - loss: 1.4

Epoch 612/1000
 - 0s - loss: 1.4159 - categorical_accuracy: 0.4688 - val_loss: 1.0990 - val_categorical_accuracy: 0.5938
Epoch 613/1000
 - 0s - loss: 1.4413 - categorical_accuracy: 0.4906 - val_loss: 1.0984 - val_categorical_accuracy: 0.6000
Epoch 614/1000
 - 0s - loss: 1.4742 - categorical_accuracy: 0.4609 - val_loss: 1.0990 - val_categorical_accuracy: 0.6000
Epoch 615/1000
 - 0s - loss: 1.4137 - categorical_accuracy: 0.4812 - val_loss: 1.0993 - val_categorical_accuracy: 0.5938
Epoch 616/1000
 - 0s - loss: 1.3805 - categorical_accuracy: 0.5063 - val_loss: 1.0997 - val_categorical_accuracy: 0.6062
Epoch 617/1000
 - 0s - loss: 1.3830 - categorical_accuracy: 0.5109 - val_loss: 1.0991 - val_categorical_accuracy: 0.6000
Epoch 618/1000
 - 0s - loss: 1.4135 - categorical_accuracy: 0.4859 - val_loss: 1.0989 - val_categorical_accuracy: 0.6000
Epoch 619/1000
 - 0s - loss: 1.3736 - categorical_accuracy: 0.5141 - val_loss: 1.0988 - val_categorical_accuracy: 0.6000
Epoch 620/1000
 - 0s - loss: 1.3

Epoch 680/1000
 - 0s - loss: 1.3980 - categorical_accuracy: 0.5078 - val_loss: 1.0731 - val_categorical_accuracy: 0.6250
Epoch 681/1000
 - 0s - loss: 1.3841 - categorical_accuracy: 0.4734 - val_loss: 1.0737 - val_categorical_accuracy: 0.6125
Epoch 682/1000
 - 0s - loss: 1.3916 - categorical_accuracy: 0.4859 - val_loss: 1.0751 - val_categorical_accuracy: 0.6125
Epoch 683/1000
 - 0s - loss: 1.3891 - categorical_accuracy: 0.4906 - val_loss: 1.0748 - val_categorical_accuracy: 0.6062
Epoch 684/1000
 - 0s - loss: 1.3962 - categorical_accuracy: 0.4828 - val_loss: 1.0751 - val_categorical_accuracy: 0.6062
Epoch 685/1000
 - 0s - loss: 1.4319 - categorical_accuracy: 0.4859 - val_loss: 1.0755 - val_categorical_accuracy: 0.6000
Epoch 686/1000
 - 0s - loss: 1.4148 - categorical_accuracy: 0.4703 - val_loss: 1.0734 - val_categorical_accuracy: 0.6062
Epoch 687/1000
 - 0s - loss: 1.2804 - categorical_accuracy: 0.5063 - val_loss: 1.0731 - val_categorical_accuracy: 0.6062
Epoch 688/1000
 - 0s - loss: 1.3

Epoch 748/1000
 - 0s - loss: 1.3958 - categorical_accuracy: 0.4922 - val_loss: 1.0618 - val_categorical_accuracy: 0.6000
Epoch 749/1000
 - 0s - loss: 1.3723 - categorical_accuracy: 0.5094 - val_loss: 1.0603 - val_categorical_accuracy: 0.6000
Epoch 750/1000
 - 0s - loss: 1.3308 - categorical_accuracy: 0.5016 - val_loss: 1.0599 - val_categorical_accuracy: 0.6000
Epoch 751/1000
 - 0s - loss: 1.3343 - categorical_accuracy: 0.5375 - val_loss: 1.0605 - val_categorical_accuracy: 0.6000
Epoch 752/1000
 - 0s - loss: 1.3555 - categorical_accuracy: 0.5094 - val_loss: 1.0599 - val_categorical_accuracy: 0.6000
Epoch 753/1000
 - 0s - loss: 1.3616 - categorical_accuracy: 0.5234 - val_loss: 1.0592 - val_categorical_accuracy: 0.6000
Epoch 754/1000
 - 0s - loss: 1.3391 - categorical_accuracy: 0.4953 - val_loss: 1.0604 - val_categorical_accuracy: 0.6000
Epoch 755/1000
 - 0s - loss: 1.3950 - categorical_accuracy: 0.4750 - val_loss: 1.0605 - val_categorical_accuracy: 0.6000
Epoch 756/1000
 - 0s - loss: 1.3

Epoch 816/1000
 - 0s - loss: 1.3067 - categorical_accuracy: 0.5125 - val_loss: 1.0411 - val_categorical_accuracy: 0.6187
Epoch 817/1000
 - 0s - loss: 1.3572 - categorical_accuracy: 0.4875 - val_loss: 1.0407 - val_categorical_accuracy: 0.6187
Epoch 818/1000
 - 0s - loss: 1.3713 - categorical_accuracy: 0.4891 - val_loss: 1.0399 - val_categorical_accuracy: 0.6125
Epoch 819/1000
 - 0s - loss: 1.2920 - categorical_accuracy: 0.5266 - val_loss: 1.0411 - val_categorical_accuracy: 0.6062
Epoch 820/1000
 - 0s - loss: 1.3204 - categorical_accuracy: 0.4891 - val_loss: 1.0412 - val_categorical_accuracy: 0.6062
Epoch 821/1000
 - 0s - loss: 1.3598 - categorical_accuracy: 0.5219 - val_loss: 1.0409 - val_categorical_accuracy: 0.6062
Epoch 822/1000
 - 0s - loss: 1.2847 - categorical_accuracy: 0.5297 - val_loss: 1.0424 - val_categorical_accuracy: 0.6000
Epoch 823/1000
 - 0s - loss: 1.3614 - categorical_accuracy: 0.4953 - val_loss: 1.0421 - val_categorical_accuracy: 0.6000
Epoch 824/1000
 - 0s - loss: 1.3

Epoch 884/1000
 - 0s - loss: 1.3834 - categorical_accuracy: 0.5109 - val_loss: 1.0223 - val_categorical_accuracy: 0.6187
Epoch 885/1000
 - 0s - loss: 1.3564 - categorical_accuracy: 0.5109 - val_loss: 1.0218 - val_categorical_accuracy: 0.6187
Epoch 886/1000
 - 0s - loss: 1.3355 - categorical_accuracy: 0.5047 - val_loss: 1.0194 - val_categorical_accuracy: 0.6187
Epoch 887/1000
 - 0s - loss: 1.3388 - categorical_accuracy: 0.5375 - val_loss: 1.0192 - val_categorical_accuracy: 0.6187
Epoch 888/1000
 - 0s - loss: 1.2699 - categorical_accuracy: 0.5094 - val_loss: 1.0199 - val_categorical_accuracy: 0.6187
Epoch 889/1000
 - 0s - loss: 1.2508 - categorical_accuracy: 0.5344 - val_loss: 1.0182 - val_categorical_accuracy: 0.6250
Epoch 890/1000
 - 0s - loss: 1.3166 - categorical_accuracy: 0.5156 - val_loss: 1.0177 - val_categorical_accuracy: 0.6250
Epoch 891/1000
 - 0s - loss: 1.2983 - categorical_accuracy: 0.4953 - val_loss: 1.0188 - val_categorical_accuracy: 0.6250
Epoch 892/1000
 - 0s - loss: 1.2

Epoch 952/1000
 - 0s - loss: 1.3467 - categorical_accuracy: 0.5188 - val_loss: 1.0056 - val_categorical_accuracy: 0.6187
Epoch 953/1000
 - 0s - loss: 1.3503 - categorical_accuracy: 0.5078 - val_loss: 1.0052 - val_categorical_accuracy: 0.6125
Epoch 954/1000
 - 0s - loss: 1.3628 - categorical_accuracy: 0.5047 - val_loss: 1.0058 - val_categorical_accuracy: 0.6062
Epoch 955/1000
 - 0s - loss: 1.2997 - categorical_accuracy: 0.5375 - val_loss: 1.0064 - val_categorical_accuracy: 0.6125
Epoch 956/1000
 - 0s - loss: 1.3677 - categorical_accuracy: 0.4891 - val_loss: 1.0069 - val_categorical_accuracy: 0.6000
Epoch 957/1000
 - 0s - loss: 1.2744 - categorical_accuracy: 0.5328 - val_loss: 1.0077 - val_categorical_accuracy: 0.6000
Epoch 958/1000
 - 0s - loss: 1.3427 - categorical_accuracy: 0.5047 - val_loss: 1.0078 - val_categorical_accuracy: 0.6062
Epoch 959/1000
 - 0s - loss: 1.3429 - categorical_accuracy: 0.5234 - val_loss: 1.0075 - val_categorical_accuracy: 0.6062
Epoch 960/1000
 - 0s - loss: 1.3

In [12]:
scores = model.evaluate(X_train, Y_train, batch_size=30)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

categorical_accuracy: 61.37%


In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [16]:
X_test[:1]

array([[0.18662306, 0.05074724, 0.81522049, 0.        , 0.        ,
        0.        ]])

In [27]:
print(X_test[:1])
predictions = model.predict_classes(X_test[:1], verbose = 1) + 1
print()
print(predictions)

[[0.18662306 0.05074724 0.81522049 0.         0.         0.        ]]

[1]


In [None]:
print(X_test[:1])
predictions = model.predict_classes(X_test[:1], verbose = 1) + 1
print()
print(predictions)

In [None]:
import time
i = 0

def hello(model):
    global i
    print("read csv, Interval {}".format(i))
    predictions = model.predict(X_test[:1], verbose = 1)
    print(predictions)
    i = i + 1
    
nexttime = time.time()
while True:
    hello(model)          # take t sec
    nexttime += 5
    sleeptime = nexttime - time.time()
    if sleeptime > 0:
        time.sleep(sleeptime)

### Comparing the effects of training the model on various subsamples of the data

In [None]:
result_dict = {}
subset_data_list = [0.2, 0.4, 0.6, 0.8]

for subset_data in subset_data_list:
    #training the model on a proportion of data and save the loss and accuracy
    
    model = create_model()
    model.compile(optimizer = Adam(lr = 0.000005), loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
    
    #adding callbacks
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor=0.5,
                                  patience=50, min_lr=1e-7, verbose = 1)
    history = History()
    logger = CSVLogger(os.path.join(rootPath, 'logs', 'log_results-{}.csv'.format(subset_data)))
    
    #training the model with subset of data
    history = model.fit(X_train, Y_train,
                        validation_split = 1 - subset_data,
                        epochs = 1000,
                        batch_size = 64,
                        verbose = 2,
                        callbacks = [reduce_lr, history, logger])
    
    result_dict[subset_data] = {
        'training_loss' : history.history['loss'],
        'validation_loss': history.history['val_loss'],
        'training_accuracy': history.history['categorical_accuracy'],
        'validation_accuracy': history.history['val_categorical_accuracy']
    }

In [None]:
def plotGraph(result_dict, plot_var):
    #this plots a graph of training loss vs epochs
    for subset_data in result_dict.keys():
        ax = sns.lineplot(x = np.arange(1000), 
                          y = result_dict[subset_data][plot_var], label =subset_data)
        ax.set(xlabel = 'training epoch', ylabel = plot_var)
        
    if plot_var == "training_loss":
        #rescale for easier comparison
        ax.set_ylim(1, 2.5)

        
    ax.set_title('{} with epoch'.format(plot_var))
    ax.legend()
    
    return ax

In [None]:
plotGraph(result_dict, 'training_loss')

In [None]:
plotGraph(result_dict,'validation_loss')

In [None]:
plotGraph(result_dict,'training_accuracy')

In [None]:
plotGraph(result_dict,'validation_accuracy')

In [None]:
predictions = model.predict(X_test[:5], verbose = 1)
# predictions = model.predict_classes(X_test[:50], verbose=1)

In [None]:
print(X_test[:5])

In [None]:
print(predictions)