In [1]:
import keras
import sys
import numpy as np

from keras.models import Sequential, Model
from keras.layers import Dense, Input, Dropout, Reshape, Conv2D, Flatten, MaxPooling2D, LSTM
from keras.optimizers import Adam, SGD
from sklearn.model_selection import StratifiedKFold
from scipy import stats

from keras import backend as K

sys.path.append('/Users/davidlaredorazo/Documents/University_of_California/Research/Projects')

from ann_framework.data_handlers.data_handler_MNIST import MNISTDataHandler

Using TensorFlow backend.


In [2]:
def get_compiled_model(test_model, shape):
    
    K.clear_session()  #Clear the previous tensorflow graph
    
    #Shared parameters for the models
    optimizer = Adam(lr=0.001,beta_1=0.5)
    
    lossFunction = "categorical_crossentropy"
    metrics = ["accuracy"]
    model = None

    #Create and compile the models
    model = test_model(shape)
    model.compile(optimizer = optimizer, loss = lossFunction, metrics = metrics)

    return model

In [3]:
def model1(input_shape):
    
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(80, input_shape=input_shape, activation='tanh', kernel_initializer='glorot_normal', name='fc1'))
    model.add(Dense(10, activation='softmax', name='out'))
    
    return model

def modelCNN(input_shape):
    
    #Create a sequential model
    model = Sequential()
    
    model.add(Conv2D(10, kernel_size=(11,11), strides=(2,2), input_shape=input_shape, padding='valid', name='conv1'))
    model.add(MaxPooling2D(pool_size=(4, 4), padding='valid', name='pool1'))
    model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
    model.add(Dense(10,activation='softmax', name='out'))
    
    return model

def model3(input_shape):
    
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(24, input_shape=input_shape, activation='tanh', kernel_initializer='glorot_normal', name='fc1'))
    model.add(Dense(10, activation='softmax', name='out'))
    
    return model

In [4]:
k = 2  #For 10-fold cross validation


dHandler_mnist = MNISTDataHandler()
dHandler_mnist.load_data(verbose = 1)

y_multiclass = [np.where(r==1)[0][0] for r in dHandler_mnist.y_train]

folds = list(StratifiedKFold(n_splits=k, shuffle=True).split(dHandler_mnist.X_train, y_multiclass))

print(dHandler_mnist.X_train.shape)
print(dHandler_mnist.X_train[:5])
print(dHandler_mnist.X_test.shape)
print(dHandler_mnist.X_test[:5])
print(folds[0][0].shape)
print(folds[0][1].shape)

Loading data. Cross-Validation ratio 0
(60000, 28, 28, 1)
[[[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0

In [5]:
evaluations_cv = list()
evaluations_test = list()
#models = [model1, model2, model3]
models = [modelCNN]

for kModel in models:
    
    print('Validation on model')
    #input_shape = (784,)
    input_shape = (28,28,1)
    model = get_compiled_model(kModel, input_shape)
    model.summary()
    
    
    for j, (train_idx, val_idx) in enumerate(folds):

        print('\nFold ', j)

        X_train_cv = dHandler_mnist.X_train[train_idx]
        y_train_cv = dHandler_mnist.y_train[train_idx]
        X_valid_cv = dHandler_mnist.X_train[val_idx]
        y_valid_cv = dHandler_mnist.y_train[val_idx]

        #input_shape = (784,)
        input_shape = (28,28,1)
        model = get_compiled_model(kModel, input_shape)

        model.fit(X_train_cv, y_train_cv, batch_size=512, epochs=1, verbose=1)
        
        evaluation_cv = model.evaluate(X_valid_cv, y_valid_cv)
        evaluation_test = model.evaluate(dHandler_mnist.X_test, dHandler_mnist.y_test)

        evaluations_cv.append(evaluation_cv)
        evaluations_test.append(evaluation_test)

Validation on model
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 9, 9, 10)          1220      
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 2, 2, 10)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 40)                0         
_________________________________________________________________
out (Dense)                  (None, 10)                410       
Total params: 1,630
Trainable params: 1,630
Non-trainable params: 0
_________________________________________________________________

Fold  0
Epoch 1/1

Fold  1
Epoch 1/1


In [6]:
#CV evaluations

print("k-fold CV\n\n")

evaluations_cv_model1 = evaluations_cv[:10]
evaluations_cv_model2 = evaluations_cv[10:20]
evaluations_cv_model3 = evaluations_cv[20:]

evaluations_cv_model1_np = np.zeros([k, 2])
evaluations_cv_model2_np = np.zeros([k, 2])
evaluations_cv_model3_np = np.zeros([k, 2])

i = 0
for evaluation in evaluations_cv_model1:
    evaluations_cv_model1_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_cv_model2:
    evaluations_cv_model2_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_cv_model3:
    evaluations_cv_model3_np[i] = np.array(evaluation)
    i = i + 1
    
print("Model 1")
print(stats.describe(evaluations_cv_model1_np))
print("Model 2")
print(stats.describe(evaluations_cv_model2_np))
print("Model 3")
print(stats.describe(evaluations_cv_model3_np))
    
#Test evaluations  

print("\n\nTest results\n\n")

evaluations_test_model1 = evaluations_test[:10]
evaluations_test_model2 = evaluations_test[10:20]
evaluations_test_model3 = evaluations_test[20:]

evaluations_test_model1_np = np.zeros([k, 2])
evaluations_test_model2_np = np.zeros([k, 2])
evaluations_test_model3_np = np.zeros([k, 2])


i = 0
for evaluation in evaluations_test_model1:
    evaluations_test_model1_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_test_model2:
    evaluations_test_model2_np[i] = np.array(evaluation)
    i = i + 1
    
i = 0
for evaluation in evaluations_test_model3:
    evaluations_test_model3_np[i] = np.array(evaluation)
    i = i + 1

print("Model 1")
print(stats.describe(evaluations_test_model1_np))
print("Model 2")
print(stats.describe(evaluations_test_model2_np))
print("Model 3")
print(stats.describe(evaluations_test_model3_np))

k-fold CV


Model 1
DescribeResult(nobs=2, minmax=(array([1.45465347, 0.64343101]), array([1.58674116, 0.67779889])), mean=array([1.52069731, 0.66061495]), variance=array([0.00872358, 0.00059058]), skewness=array([-5.08097414e-15, -9.59842596e-15]), kurtosis=array([-2., -2.]))
Model 2
DescribeResult(nobs=2, minmax=(array([0., 0.]), array([0., 0.])), mean=array([0., 0.]), variance=array([0., 0.]), skewness=array([0., 0.]), kurtosis=array([-3., -3.]))
Model 3
DescribeResult(nobs=2, minmax=(array([0., 0.]), array([0., 0.])), mean=array([0., 0.]), variance=array([0., 0.]), skewness=array([0., 0.]), kurtosis=array([-3., -3.]))


Test results


Model 1
DescribeResult(nobs=2, minmax=(array([1.42838581, 0.6477    ]), array([1.57063854, 0.6901    ])), mean=array([1.49951217, 0.6689    ]), variance=array([0.01011792, 0.00089888]), skewness=array([0., 0.]), kurtosis=array([-2., -2.]))
Model 2
DescribeResult(nobs=2, minmax=(array([0., 0.]), array([0., 0.])), mean=array([0., 0.]), variance=array([0