# Problem 2 - Deep Learning - CNN

## (AUTHOR : JEREMY SHIH)

In [1]:
import idx2numpy

def load_data():
    '''
    Loads data from MNIST-yann-lecun.
    Input:
        -None
    Output:
        - train_data
        - train_labels
        - test_data
        - test_labels
    '''
    # read in training and testing data from MNIST-yann-lecun
    train_data = idx2numpy.convert_from_file('MNIST/train-images-idx3-ubyte') 
    train_labels = idx2numpy.convert_from_file('MNIST/train-labels-idx1-ubyte') # (60000,)
    test_data = idx2numpy.convert_from_file('MNIST/t10k-images-idx3-ubyte') 
    test_labels = idx2numpy.convert_from_file('MNIST/t10k-labels-idx1-ubyte') # (10000,)

    # reshape dataset to have a single channel
    train_data = train_data.reshape((train_data.shape[0], 28, 28, 1)) # (60000, 28, 28, 1)
    test_data = test_data.reshape((test_data.shape[0], 28, 28, 1)) # (10000, 28, 28, 1)

    # one hot encode target values
    train_labels = to_categorical(train_labels)
    test_labels = to_categorical(test_labels)
    
    return train_data, train_labels, test_data, test_labels


In [2]:
def prep_pixels(train, test):
    '''
    Scale pixels to 0-1 (normalization)
    
    Input:
        - train
        - test
    Output:
        - train_norm
        - test_norm
        
    Note: Normalization reduces the complexity of the problem your network is trying to solve
    can increase the accuracy of yiour model and speed up the training.
    For example, if you give two features equal importance -> ex: age between 0-120, 
    but income between 10k-100k -> initially income more important than age (weighted more).
    '''
    # integers to floats
    train_norm = train.astype('float32')
    test_norm = test.astype('float32')
    # normalize to range 0-1
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0
    
    return train_norm, test_norm

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dropout
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.utils import plot_model

def define_model():
    '''
    Define a CNN (sequential model).
    
    Input:
        - None
    Output:
        - CNN model
    
    Conv2D:
        - filters = dimentionality of the output
        - kernel size = height and width of 2D convolution window
        - activation function
        - kernel_initializer = initializer for the kernel weights matrix
        - input shape -> when using this as the first layer in a model
        
    MaxPooling2D
        - pool_size = window size over which to take the maximum
        
    Flatten -> flattens the input, does not affect the batch size
    
    Dense -> implements 'output = activation(dot(input, kernel) + bias)'
        - units = dimensionality of the output space
        - activation_Function = activation function to use
        - kernel_initializer = regularizer function applied to the kernel weights matrix
            -> 'he_uniform' -> draws samples from a uniform distribution
            
    SGD (Stochastic Gradient Descent Optimizer)
        - learning rate
        - momentum -> accelerates gradient descent in the relevant direction and dampens oscillations
    
    Compile
        - optimizer -> name of optimizer
        - loss -> objective function
        - metrics -> list of metrics displayed during fit() and logged to the History object returned by fit()
            -> evaluated by the model during training and testing
    
    Note: A sequential model is appropriate for a plain stack of layers where each layer
    has exactly one input tensor and one output sensor.
    '''
    model = Sequential()
    # FRONT END - convolutional and pooling layers
    model.add(Conv2D(32, (3,3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1))) # filter size of (3,3) and number of filters = 32
    model.add(MaxPooling2D((2,2)))
    model.add(Flatten()) # flatten to provide features to the classifier -> convert to vector
    model.add(Dropout(0.5)) # prevent overfitting
    
    # BACK END
    # add dense layer to interpret features
    model.add(Dense(100, activation='relu', kernel_initializer='he_uniform')) 
    model.add(Dense(10, activation='softmax')) # require 10 nodes to predict 10 classes
    
    # OPTIMIZER USED
    opt = SGD(lr=0.01, momentum=0.9) # Stochastic gradient descent optimizer: learning rate=0.01 and momentum=0.9
    
    # COMPILE MODEL
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) # categorical cross-entropy loss function will be optimized -> suitable for multi-class classification

    return model

In [5]:
# Load data
train_data, train_labels, test_data, test_labels = load_data()

# Prep Pixels (normalize)
train_data, test_data = prep_pixels(train_data, test_data)

# Define Model
model = define_model()

# Print model summary and save model_plot image
print(model.summary())
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)


# TRAINS THE MODEL ON TRAINED CNN (USING TRAINING MNIST AS INPUT)
'''
-input data
-target data
- batch size = number of samples per gradient update
- epochs = number of iterations over entire x and y data provided
- verbose = 0 = silent
- validation data = data which to evluate the loss and any model metrics at the end of each epoch
'''
history = model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_data=(train_data, train_labels), verbose=0)


# USE TRAINING MNIST AS INPUT TO TRAINED CNN
'''
input_data, target data, verbose = 0 = silent
'''
loss_value, accuracy = model.evaluate(train_data, train_labels, verbose=0) 
'''
loss value = how well or poorly a certain model behaves after each iteration of optimization
the lower the loss, the better the omodel -> calculated on training and validation
interpretation is how well the model is doing for these two sets
loss is not a percentage, but a summation of the errors made for each example in training or validation sets
'''
print("Classification accuracy for training set is: ", accuracy)



# TRAINS THE MODEL ON TRAINED CNN (USING TESTING MNIST AS INPUT)
'''
-input data
-target data
- batch size = number of samples per gradient update
- epochs = number of iterations over entire x and y data provided
- verbose = 0 = silent
- validation data = data which to evluate the loss and any model metrics at the end of each epoch
'''
history = model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_data=(test_data, test_labels), verbose=0)


# USE TRAINING MNIST AS INPUT TO TRAINED CNN
'''
input_data, target data, verbose = 0 = silent
'''
loss_value, accuracy = model.evaluate(test_data, test_labels, verbose=0) 
'''
loss value = how well or poorly a certain model behaves after each iteration of optimization
the lower the loss, the better the omodel -> calculated on training and validation
interpretation is how well the model is doing for these two sets
loss is not a percentage, but a summation of the errors made for each example in training or validation sets
'''
print("Classification accuracy for testing set is: ", accuracy)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 5408)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5408)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               540900    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1010      
Total params: 542,230
Trainable params: 542,230
Non-trainable params: 0
________________________________________________