In [1]:
import keras
import sys
import numpy as np

from keras.models import Sequential, Model
from keras.layers import Dense, Input, Dropout, Reshape, Conv2D, Flatten, MaxPooling2D, LSTM
from keras.optimizers import Adam, SGD
from sklearn.model_selection import StratifiedKFold
from scipy import stats

from keras import backend as K

sys.path.append('../../data_handlers/')

from data_handler_MNIST import MNISTDataHandler

Using TensorFlow backend.


In [2]:
def get_compiled_model(test_model, shape):
    
    K.clear_session()  #Clear the previous tensorflow graph
    
    #Shared parameters for the models
    optimizer = Adam(lr=0.001,beta_1=0.5)
    
    lossFunction = "categorical_crossentropy"
    metrics = ["accuracy"]
    model = None

    #Create and compile the models
    model = test_model(shape)
    model.compile(optimizer = optimizer, loss = lossFunction, metrics = metrics)

    return model

In [3]:
def model1(input_shape):
    
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(80, input_shape=input_shape, activation='tanh', kernel_initializer='glorot_normal', name='fc1'))
    model.add(Dense(10, activation='softmax', name='out'))
    
    return model

def model2(input_shape):
    
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(64, input_shape=input_shape, activation='relu', kernel_initializer='glorot_normal', name='fc1'))
    model.add(Dense(64, activation='relu', kernel_initializer='glorot_normal', name='fc2'))
    model.add(Dense(56, activation='relu', kernel_initializer='glorot_normal', name='fc3'))
    model.add(Dense(56, activation='relu', kernel_initializer='glorot_normal', name='fc4'))
    model.add(Dense(10, activation='softmax', name='out'))
    
    return model

def model3(input_shape):
    
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(24, input_shape=input_shape, activation='tanh', kernel_initializer='glorot_normal', name='fc1'))
    model.add(Dense(10, activation='softmax', name='out'))
    
    return model

In [4]:
k = 10  #For 10-fold cross validation


dHandler_mnist = MNISTDataHandler()
dHandler_mnist.load_data(verbose = 1)

y_multiclass = [np.where(r==1)[0][0] for r in dHandler_mnist.y_train]

folds = list(StratifiedKFold(n_splits=k, shuffle=True).split(dHandler_mnist.X_train, y_multiclass))

print(dHandler_mnist.X_train.shape)
print(dHandler_mnist.X_train[:5])
print(dHandler_mnist.X_test.shape)
print(dHandler_mnist.X_test[:5])
print(folds[0][0].shape)
print(folds[0][1].shape)

Loading data. Cros-Validation ratio 0
(60000, 784)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
(10000, 784)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
(53994,)
(6006,)


In [5]:
evaluations_cv = list()
evaluations_test = list()
models = [model1, model2, model3]

for kModel in models:
    
    print('Validation on model')
    input_shape = (784,)
    model = get_compiled_model(kModel, input_shape)
    model.summary()
    
    
    for j, (train_idx, val_idx) in enumerate(folds):

        print('\nFold ', j)

        X_train_cv = dHandler_mnist.X_train[train_idx]
        y_train_cv = dHandler_mnist.y_train[train_idx]
        X_valid_cv = dHandler_mnist.X_train[val_idx]
        y_valid_cv = dHandler_mnist.y_train[val_idx]

        input_shape = (784,)
        model = get_compiled_model(kModel, input_shape)

        model.fit(X_train_cv, y_train_cv, batch_size=512, epochs=50, verbose=0)
        
        evaluation_cv = model.evaluate(X_valid_cv, y_valid_cv)
        evaluation_test = model.evaluate(dHandler_mnist.X_test, dHandler_mnist.y_test)

        evaluations_cv.append(evaluation_cv)
        evaluations_test.append(evaluation_test)

Validation on model
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
fc1 (Dense)                  (None, 80)                62800     
_________________________________________________________________
out (Dense)                  (None, 10)                810       
Total params: 63,610
Trainable params: 63,610
Non-trainable params: 0
_________________________________________________________________

Fold  0

Fold  1

Fold  2

Fold  3

Fold  4

Fold  5

Fold  6

Fold  7

Fold  8

Fold  9
Validation on model
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
fc1 (Dense)                  (None, 64)                50240     
_________________________________________________________________
fc2 (Dense)                  (None, 64)                4160      
_________________________________________________________________
fc3 (Den

In [6]:
#CV evaluations

print("k-fold CV\n\n")

evaluations_cv_model1 = evaluations_cv[:10]
evaluations_cv_model2 = evaluations_cv[10:20]
evaluations_cv_model3 = evaluations_cv[20:]

evaluations_cv_model1_np = np.zeros([k, 2])
evaluations_cv_model2_np = np.zeros([k, 2])
evaluations_cv_model3_np = np.zeros([k, 2])

i = 0
for evaluation in evaluations_cv_model1:
    evaluations_cv_model1_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_cv_model2:
    evaluations_cv_model2_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_cv_model3:
    evaluations_cv_model3_np[i] = np.array(evaluation)
    i = i + 1
    
print("Model 1")
print(stats.describe(evaluations_cv_model1_np))
print("Model 2")
print(stats.describe(evaluations_cv_model2_np))
print("Model 3")
print(stats.describe(evaluations_cv_model3_np))
    
#Test evaluations  

print("\n\nTest results\n\n")

evaluations_test_model1 = evaluations_test[:10]
evaluations_test_model2 = evaluations_test[10:20]
evaluations_test_model3 = evaluations_test[20:]

evaluations_test_model1_np = np.zeros([k, 2])
evaluations_test_model2_np = np.zeros([k, 2])
evaluations_test_model3_np = np.zeros([k, 2])


i = 0
for evaluation in evaluations_test_model1:
    evaluations_test_model1_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_test_model2:
    evaluations_test_model2_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_test_model3:
    evaluations_test_model3_np[i] = np.array(evaluation)
    i = i + 1

print("Model 1")
print(stats.describe(evaluations_test_model1_np))
print("Model 2")
print(stats.describe(evaluations_test_model2_np))
print("Model 3")
print(stats.describe(evaluations_test_model3_np))

k-fold CV


Model 1
DescribeResult(nobs=10, minmax=(array([0.08345238, 0.96881254]), array([0.10745787, 0.97533333])), mean=array([0.09506876, 0.97271662]), variance=array([7.97859795e-05, 3.82503005e-06]), skewness=array([ 0.29990743, -0.71142964]), kurtosis=array([-1.34864008, -0.32419061]))
Model 2
DescribeResult(nobs=10, minmax=(array([0.13095074, 0.97098065]), array([0.18375098, 0.97699617])), mean=array([0.15142985, 0.97484998]), variance=array([2.66089864e-04, 3.62935840e-06]), skewness=array([ 0.60654272, -0.71552024]), kurtosis=array([-0.55766768, -0.34554427]))
Model 3
DescribeResult(nobs=10, minmax=(array([0.1296271 , 0.95066667]), array([0.17914387, 0.95949325])), mean=array([0.15461241, 0.95438307]), variance=array([2.34231059e-04, 7.92957249e-06]), skewness=array([-0.12445648,  0.47065563]), kurtosis=array([-1.00787196, -0.81265158]))


Test results


Model 1
DescribeResult(nobs=10, minmax=(array([0.08384685, 0.9725    ]), array([0.09121828, 0.9767    ])), mean=array([0.0