In [None]:
from tensorflow.keras.datasets import mnist
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import random

# Problem 3) [4 pts] Application of Keras to build, compile, and train a neural network as a three class classifier for MNIST dataset (0 vs. 1 vs. 2):

### a. Use mnist function in keras.datasets to load MNIST dataset and split it into training and testing sets. Then, randomly select 20% of the training images along with their corresponding labels to be the validation data.

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# randomly selection 20% images for validation
max_range = len(x_train)
random_list = random.sample(range(0, max_range), int(max_range * 20 / 100))

x_val = np.asarray([x_train[i] for i in random_list])
y_val = np.asarray([y_train[i] for i in random_list])

### b. Feature extraction: average the pixel values in the quadrants in each image to generate a feature vector of 4 values for each image.

In [None]:
def feat_extract(images):
  width=images.shape[1]
  height=images.shape[2]
  features=np.zeros((images.shape[0],4))
  features_temp=np.sum(images[:,0:int(width/2),0:int(height/2)],axis=2)#quadrant 0
  features[:,0]=np.sum(features_temp,axis=1)/(width*height/4)
  features_temp=np.sum(images[:,0:int(width/2),int(height/2):],axis=2) #quadrant 1
  features[:,1]=np.sum(features_temp,axis=1)/(width*height/4)
  features_temp=np.sum(images[:,int(width/2):,0:int(height/2)],axis=2) #quadrant 2
  features[:,2]=np.sum(features_temp,axis=1)/(width*height/4)
  features_temp=np.sum(images[:,int(width/2):,int(height/2):],axis=2)  #quadrant 3
  features[:,3]=np.sum(features_temp,axis=1)/(width*height/4)
  return features

In [None]:
#Calculating the training, validation and testing feature (average of the four quadrants grid)
feature_train=feat_extract(x_train)
feature_val=feat_extract(x_val)
feature_test=feat_extract(x_test)

### c. Convert the label vectors for all the sets to binary class matrices using to_categorical() Keras function.

In [None]:
num_classes = 10
y_train_cat = keras.utils.to_categorical(y_train, num_classes)
y_test_cat = keras.utils.to_categorical(y_test, num_classes)
y_val_cat = keras.utils.to_categorical(y_val, num_classes)

In [None]:
def plot_curve(accuracy_train, loss_train):
  epochs=np.arange(loss_train.shape[0])
  plt.subplot(1,2,1)
  plt.plot(epochs,accuracy_train)
  #plt.axis([-1,2,-1,2])
  plt.xlabel('Epoch#')
  plt.ylabel('Accuracy')
  plt.title('Training Accuracy')

  plt.subplot(1,2,2)
  plt.plot(epochs,loss_train)
  plt.xlabel('Epoch#')
  plt.ylabel('Binary crossentropy loss')
  plt.title('Training loss')
  plt.show()

In [None]:
def model_report(title, model_in):
    
    print('----------------------------------------------------')
    print(title)
    print('----------------------------------------------------')
    
    history = model_in.fit(feature_train, y_train_cat, batch_size=16, epochs=50, verbose=0) 
    
    train_score = model_in.evaluate(feature_train,y_train_cat)
    
    val_score = model_in.evaluate(feature_val, y_val_cat)
    
    test_score = model_in.evaluate(feature_test, y_test_cat)

    print('Training Loss: ', train_score[0])
    print('Training Accuracy: ', train_score[1])

    print('Validation Loss: ', val_score[0])
    print('Validation Accuracy: ', val_score[1])
    
    print('Test Loss: ', test_score[0])
    print('Test Accuracy: ', test_score[1])
    
    plt.figure(figsize=[9,5])
    acc_curve = np.array(history.history['accuracy'])
    loss_curve = np.array(history.history['loss'])
    plot_curve(acc_curve, loss_curve)
    plt.show()
    print('----------------------------------------------------')

### d. Build, compile, train, and then evaluate:
i. Build a neural network with 1 layer that contains 10 nodes using the Keras library. <br>
ii. Compile the network. Make sure to select a correct loss function for this classification problem. Use stochastic gradient descent learning (SGD, learning rate of 0.0001). Explain your selection of the loss function.<br>
iii. Train the network for 50 epochs and a batch size of 16.<br>
iv. Plot the training loss (i.e., the learning curve) for all the epochs.<br>
v. Use the evaluate() Keras function to find the training and validation loss and accuracy.

In [None]:
model1 = keras.Sequential()
model1.add(keras.layers.Dense(input_dim=4, units=10, activation='softmax'))
model1.summary()

model1.compile(loss='categorical_crossentropy', 
               optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001), 
               metrics=['accuracy'])

model_report('Model 1 - 1 Layer, 10 nodes', model1)

### e. Repeat step (d) for each of the following networks:

In [None]:
model2 = keras.Sequential()
model2.add(keras.layers.Dense(input_dim=4, units=50, activation='relu'))
model2.add(keras.layers.Dense(units=10, activation='softmax'))
model2.summary()

model2.compile(loss='categorical_crossentropy', 
               optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001), 
               metrics=['accuracy'])

model_report('Model 2 - 1 Layer, 50 nodes, and Output layer', model2, feature_train, y_train_cat, feature_val, y_val_cat)

In [None]:
model3 = keras.Sequential()
model3.add(keras.layers.Dense(input_dim=4, units=100, activation='relu'))
model3.add(keras.layers.Dense(units=10, activation='softmax'))
model3.summary()

model3.compile(loss='categorical_crossentropy', 
               optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001), 
               metrics=['accuracy'])

model_report('Model 3 - 1 Layer, 100 nodes, and Output layer', model3, feature_train, y_train_cat, feature_val, y_val_cat)

In [None]:
model4 = keras.Sequential()
model4.add(keras.layers.Dense(input_dim=4, units=100, activation='relu'))
model4.add(keras.layers.Dense(units=10, activation='softmax'))
model4.summary()

model4.compile(loss='categorical_crossentropy', 
               optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001), 
               metrics=['accuracy'])

model_report('Model 4 : 2 Layers - 100 nodes, 10 nodes', model4, feature_train, y_train_cat, feature_val, y_val_cat)

In [None]:
model5 = keras.Sequential()
model5.add(keras.layers.Dense(input_dim=4, units=100, activation='relu'))
model5.add(keras.layers.Dense(units=50, activation='relu'))
model5.add(keras.layers.Dense(units=10, activation='softmax'))
model5.summary()

model5.compile(loss='categorical_crossentropy', 
               optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001), 
               metrics=['accuracy'])

model_report('Model 5 : 2 Layers - 100 nodes, 50 nodes, and Output layer', model5, feature_train, y_train_cat, feature_val, y_val_cat)

### f. What behavior do you observe in the training loss and the validation loss when you increase the number layers and nodes in the previous table. Which model is more suitable in this problem? Explain.

### g. Evaluate the selected model in part (e) on the testing set and report the testing loss and accuracy.