In [1]:
# Homework 1
#import keras
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix   
import numpy as np

In [2]:
# Load data and data standardization
def load_data():
    '''Load the MNIST dataset'''
    
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
    return (X_train, y_train, X_test, y_test)

def data_std(X_train, X_test):
    '''Data standardization
    
    Parameters
    ----------
    X_train: origianl training set
    X_test: original test set
    
    Returns
    -------
    X_train_std: rescaled training set
    X_test_std: rescaled test set
    '''
    sc = StandardScaler()
    sc.fit(X_train)
    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)
    
    return X_train_std, X_test_std

In [3]:
def create_NN(n_features = 784, n_outputs = 10): # 30 points
    '''create a deep feedforwad neural network using keras
    
    Parameters
    -----------
    n_features: the number of input features/units
    n_output: the number of output units
    
    Returns
    -------
    myNN: the neural network model
    
    '''
    ## add your code here
    myNN = tf.keras.models.Sequential()

    myNN.add(tf.keras.layers.Dense(
        units = 50,
        input_dim = n_features,
        kernel_initializer = 'glorot_uniform',
        bias_initializer = 'zeros',
        activation = 'sigmoid'))
    
    myNN.add(tf.keras.layers.Dense(
        units = 10,
        input_dim = 50,
        kernel_initializer = 'glorot_uniform',
        bias_initializer = 'zeros',
        activation = 'softmax'))
    
    sgd_optimizer = tf.keras.optimizers.SGD(lr=0.001, decay=1e-7, momentum=.9)
    myNN.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy',metrics=["accuracy"])
    ##
    
    return myNN

In [4]:
def nn_params_search(nn, X_train, y_train): # 30 points
    '''Search best paramaters
    
    Parameters
    ----------
    X_train: features
    y_train: target of the input

    
    Returns
    -------
    best_params_
    
    Example grid: (you can customize the search graid by youself)
    param_grid = [{'batch_size': [64, 128], 'epochs' : [10, 30, 50]}]
        
    '''
    ## add your code here
    param_grid = {
        'epochs' : [50, 70, 100],
        'batch_size' : [32, 64, 128]
    }
    
    grid = GridSearchCV(
    estimator = nn,
    param_grid = param_grid,
    cv = 3,
    n_jobs=-1,
    #scoring = 'neg_log_loss')
    )
    
    gs_cv = grid.fit(X_train, y_train)
    
    gs_cv.best_params_
    
    ##
    
    return gs_cv.best_params_

In [5]:
def retrain_best_nn(best_params, X_train, y_train): # 10 points
    '''
    Retrain classifier using the best parameters
    
    Paramters
    ----------
    best_params:
    X_train: data input of the training set
    y_train: target of the input
    
    Returns
    ---------
    bestNN: the nn classifier trained using the best parameters
    
    '''
    ## add your code here  
    bestNN = create_NN()
    
    history = bestNN.fit(X_train, y_train,
                    batch_size=best_params['batch_size'], epochs=best_params['epochs'],
                    verbose=1,
                    validation_split=0.1)
    ##
    
    return bestNN

In [6]:
def performance_acc(y, y_pred): # 10 points
    ''' calculate the concusion matrix and average accuracy
    
        Parameters
        ----------
        y: real target
        y_pred: prediction
        
        Returns
        -------
        cm: confusion matrix
        acc: accuracy
    '''
    ## add your code here
    cm = confusion_matrix(y, y_pred)
    
    correct_preds = np.sum(y == y_pred, axis = 0)
    acc = correct_preds / y_pred.shape[0]
    ##
    
    return cm, acc

In [7]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

if __name__ == '__main__':
    
    #Task 1. load the dataset
    (X_train, y_train, X_test, y_test) = load_data()
    
    # 1.1 reshape the training and test sets to N * 784. 10 points
    ## add your code here
    X_train_1 = np.reshape(X_train, [X_train.shape[0], X_train.shape[1] * X_train.shape[2]])
    X_test_1 = np.reshape(X_test, [X_test.shape[0], X_test.shape[1] * X_test.shape[2]])
    print('1. X_train: {}, X_train_1: {}'.format(X_train.shape, X_train_1.shape))
    ##
    
    # 1.2 transform y_train to one-hot vectors using keras.utils.to_categorical. 10 points
    ## add your code here
    y_train_onehot = tf.keras.utils.to_categorical(y_train)
    print('y_train_onehot: {}'.format(y_train_onehot.shape))
    ##
    
    #Task 2. create a deep feedforward neural network
    myNN = create_NN(X_train_1.shape[1], y_train_onehot.shape[1])
    myNN.summary()
    myNN1 = KerasClassifier(build_fn = create_NN, batch_size = 64, epochs = 50)
    
    #Task 3. Search best paprameters, and report the performance
    best_params = nn_params_search(myNN1, X_train_1, y_train)
    print('Best parameters: ', best_params)
    
    bestNN = retrain_best_nn(best_params, X_train_1, y_train_onehot)
    y_test_pred = bestNN.predict_classes(X_test_1)
    cm, acc = performance_acc(y_test, y_test_pred)
    print('Confusion matrix:\n', cm)
    print('Accuracy =    {:.3f}%'.format(acc*100))
    
    #Task 4. Search best nn parameters after data standardization, and report the performance
    X_train_std, X_test_std = data_std(X_train_1, X_test_1)
    
    best_params =  nn_params_search(myNN1, X_train_std, y_train)
    print('Best parameters: ', best_params)
    
    bestNN_std = retrain_best_nn(best_params, X_train_std, y_train_onehot)
    y_test_std_pred = bestNN_std.predict_classes(X_test_std)
    cm1, acc1 = performance_acc(y_test, y_test_std_pred)
    print('Confusion matrix:\n', cm1)
    print('Accuracy =    {:.3f}%'.format(acc1*100))

1. X_train: (60000, 28, 28), X_train_1: (60000, 784)
y_train_onehot: (60000, 10)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                39250     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                510       
Total params: 39,760
Trainable params: 39,760
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epo

Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Best parameters:  {'batch_size': 64, 'epochs': 100}
Train on 54000 samples, validate on 6000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100


Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Confusion matrix:
 [[ 957    0    3    1    0    6    7    2    2    2]
 [   0 1124    3    2    0    1    2    0    3    0]
 [  13    1  975    5    4    1    6    8   18    1]
 [   3    0   12  961    1   14    0    9    9    1]
 [   1    1    4    2  928    1   11    5    3   26]
 [  11    2    2   14    5  825   14    4    9    6]
 [  13    2    6    1   10    5  917    1    3    0]
 [   2    8   20    5    2    1    0  976    1   13]
 [   9    4    7    8    5   13    8    6  912    2]
 [   7    5    1   12   18    5    1   19    4  937]]
Accuracy =    95.120%




Train on 60000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100


Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Best parameters:  {'batch_size': 32, 'epochs': 100}
Train on 54000 samples, validate on 6000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100


Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100


Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Confusion matrix:
 [[ 965    0    2    2    1    1    5    2    2    0]
 [   0 1121    4    1    0    1    3    2    3    0]
 [   4    3  979    5    8    1    4   10   16    2]
 [   1    0    9  969    1    9    1    9    8    3]
 [   1    0    4    2  951    0   10    3    3    8]
 [   5    2    0   16    2  844    7    3   11    2]
 [   6    3    4    0    2   10  928    2    3    0]
 [   0    7   17    6    2    0    0  983    1   12]
 [   6    1    6   13    4   11    4   10  915    4]
 [   7    6    1   11   14    6    0   12    4  948]]
Accuracy =    96.030%
