# Import Libraries to Use

In [None]:
# Import Libraries
import os
import pickle

import matplotlib.pyplot as plt
import pylab

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Model, layers, models
import tensorflow.keras as keras

# Load Dataset to Use

In [None]:
seed = 0
np.random.seed(seed)
tf.random.set_seed(seed)

epochs = 1000  # Fixed
batch_size = 128  # Fixed
learning_rate = 0.001
use_dropout = False  # Default

In [None]:
# Fixed, no need change
def load_data(file):
    with open(file, 'rb') as fo:
        try:
            samples = pickle.load(fo)
        except UnicodeDecodeError:  # python 3.x
            fo.seek(0)
            samples = pickle.load(fo, encoding='latin1')

    data, labels = samples['data'], samples['labels']

    data = np.array(data, dtype=np.float32) / 255
    labels = np.array(labels, dtype=np.int32)
    return data, labels

In [None]:
# Training and test
x_train, y_train = load_data('data_batch_1')
x_test, y_test = load_data('test_batch_trim')

x_train = np.reshape(x_train,(x_train.shape[0],3,32,32)).transpose(0,2,3,1)
x_test = np.reshape(x_test,(x_test.shape[0],3,32,32)).transpose(0,2,3,1)

In [None]:
# Create folder to store models and results
if not os.path.exists('./models'):
    os.mkdir('./models')
if not os.path.exists('./results'):
    os.mkdir('./results')

# Make the model

In [None]:
def make_model(num_ch_c1, num_ch_c2, use_dropout):
  model = tf.keras.Sequential()
  model.add(layers.Input(shape=(32, 32, 3)))
  # model.add(layers.Reshape(target_shape=(32, 32, 3), input_shape=(3072,)))
  model.add(layers.Conv2D(num_ch_c1, 9, activation='relu', padding='valid', input_shape=(None, None, 3)))
  model.add(layers.MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
  model.add(layers.Conv2D(num_ch_c2, 5, activation='relu', padding='valid', input_shape=(None, None, 3)))
  model.add(layers.MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
  model.add(layers.Flatten())
  if use_dropout:
    model.add(layers.Dropout(0.5))
  model.add(layers.Dense(300, use_bias=True))
  if use_dropout:
    model.add(layers.Dropout(0.5))
  model.add(layers.Dense(10, use_bias=True, input_shape=(300,)))  # Here no softmax because we have combined it with the loss
  return model

# Question 1

In [None]:
# Set channel sizes
num_ch_c1 = 50
num_ch_c2 = 60

# Create folder to store models and results
if not os.path.exists('./results/parta/q1'):
    os.mkdir('./results/parta/q1')

## Train model

In [None]:
q1_model = make_model(num_ch_c1, num_ch_c2, use_dropout=False)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = keras.optimizers.SGD(learning_rate=learning_rate)

# Training
q1_model.compile(optimizer=optimizer, loss=loss, metrics='accuracy')
q1_history = q1_model.fit(x_train,
                       y_train,
                       batch_size=batch_size,
                       epochs=epochs,
                       validation_data=(x_test, y_test))


# Saving history to file
print('Saving history to file')
filename = './results/parta/q1/q1_history'
with open(filename, 'wb') as file_pi:
    pickle.dump(q1_history.history, file_pi)
print('file saved at {}'.format(filename))

## Find Max and Final Validation Accuracy


In [None]:
# Find Max Accuracy and Final Accuracy
max_acc = np.amax(q1_history.history['val_accuracy'])
final_acc = q1_history.history['val_accuracy'][epochs-1]
print('Max Val Acc: {} \t Final Val Acc: {}'.format(max_acc, final_acc))

## Plot and Save Results

In [None]:
model = 'Part A Q1'
fig = plt.figure(1, figsize=(12,4))

train_loss = q1_history.history['loss']
val_loss = q1_history.history['val_loss']
train_acc = q1_history.history['accuracy']
val_acc = q1_history.history['val_accuracy']

# Subplot 1
plt.subplot(1, 2, 1)
plt.plot(range(1, len(train_loss) + 1), train_loss, label='Train')
plt.plot(range(1, len(val_loss) + 1), val_loss, label='Test')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend()
plt.title('Loss for {}'.format(model, fontsize=14))

# Subplot 2
plt.subplot(1, 2, 2)
plt.plot(range(1, len(train_acc) + 1), train_acc, label='Train')
plt.plot(range(1, len(val_acc) + 1), val_acc, label='Test')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend()
plt.title('Accuracy for {}'.format(model, fontsize=14))

plt.savefig('./results/parta/q1/q1_plot.pdf')
plt.show()

## Plot Feature Map for first 2 test images

In [None]:
# Extracts the outputs of the C1,S1,C2,S2 layers
layer_outputs = [layer.output for layer in q1_model.layers[:4]] 

# Extract the outputs of C1,S1,C2,S2 layer
activation_model = keras.models.Model(inputs=q1_model.input, 
                                      outputs=layer_outputs)

activations_1 = activation_model.predict(np.reshape(x_test[0], (-1,32,32,3)))
activations_2 = activation_model.predict(np.reshape(x_test[1], (-1,32,32,3)))

In [None]:
layer_names = ['C1', 'S1', 'C2', 'S2']
for num, (layer, activation, name) in enumerate(zip(q1_model.layers[:4], 
                                                    activations_1, 
                                                    layer_names)):
  # Plot activation layer for x_test[0]
  fig = plt.figure(num=num, figsize=(7,5))
  # plt.gray()
  channels = activation.shape[3]
  rows = channels/10
  for i in range(channels):
    plt.subplot(rows, 10, i+1); plt.axis('off'); plt.imshow(activation[0,:,:,i])
    fig.suptitle('Feature Map for {}({})'.format(layer.name, name), fontsize=14)
    plt.savefig('./results/parta/q1/feature_map_test{}_{}.pdf'.format('0', name))

In [None]:
layer_names = ['C1', 'S1', 'C2', 'S2']
for num, (layer, activation, name) in enumerate(zip(q1_model.layers[:4], 
                                                    activations_2, 
                                                    layer_names)):
  # Plot activation layer for x_test[1]
  fig = plt.figure(num=num, figsize=(7,5))
  # plt.gray()
  channels = activation.shape[3]
  rows = channels/10
  for i in range(channels):
    plt.subplot(rows, 10, i+1); plt.axis('off'); plt.imshow(activation[0,:,:,i])
    fig.suptitle('Feature Map for {}({})'.format(layer.name, name), fontsize=14)
    plt.savefig('./results/parta/q1/feature_map_test{}_{}.pdf'.format('1', name))

In [None]:
# Save the first 2 test images
for i in range(2):
  x_img = x_test[i].astype(np.float32)
  plt.imshow(x_img)
  plt.title('x_test[{}]'.format(str(i)))
  plt.savefig('./results/parta/q1/test{}_image.pdf'.format(str(i)))

# Question 2: Grid Search

In [None]:
# Create folder to store models and results
if not os.path.exists('./results/parta/q2'):
    os.mkdir('./results/parta/q2')

ch1_list = [10,30,50,70,90]
ch2_list = [20,40,60,80,100]
combi = []
histories = {}

epochs = 1000  # Fixed
batch_size = 128  # Fixed
learning_rate = 0.001
use_dropout = False

In [None]:
for ch1 in ch1_list:
  for ch2 in ch2_list:
    print('Running for ch1 = {}, ch2 ={}'.format(ch1, ch2))
    title = '{}_{}'.format(ch1, ch2)
    combi.append(title)
    model = make_model(ch1, ch2, use_dropout)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss=loss, metrics='accuracy')
    histories[title] = model.fit(x_train,
                                 y_train,
                                 batch_size=batch_size,
                                 epochs=epochs,
                                 validation_data=(x_test, y_test),
                                 verbose=0)
    print('Training complete for ch1 = {}, ch2 ={}'.format(ch1, ch2))
    # Saving history to file
    print('Saving history to file')
    filename = './results/parta/q2/gridsearch_{}'.format(title)
    with open(filename, 'wb') as file_pi:
        pickle.dump(histories[title].history, file_pi)
    print('file saved at {}'.format(filename))

In [None]:
val_acc_df = pd.DataFrame(columns=['model', 'final_val_acc', 'max_val_acc'])

# Find the last and max val_accuracy for all models
for model in histories:
  final_val_acc = histories[model].history['val_accuracy'][-1]
  max_val_acc = np.amax(histories[model].history['val_accuracy'])
  val_acc_df = val_acc_df.append({'model': model,
                                  'final_val_acc': final_val_acc,
                                  'max_val_acc': max_val_acc}, ignore_index=True)

val_acc_df.to_csv('./results/parta/q2/gridsearch_results.csv', index=False)
val_acc_df

# Question 3

## Using optimal combination of ch1 = 70 and ch2 = 40

In [None]:
# Create folder to store models and results
if not os.path.exists('./results/parta/q3'):
    os.mkdir('./results/parta/q3')

opt_ch1 = 70
opt_ch2 = 40

oc_histories = {}

epochs = 1000  # Fixed
batch_size = 128  # Fixed
learning_rate = 0.001
optimizer_ = 'SGD'  # Question 3
use_dropout = False  # Question 3(d) (see make_model)

### Part a: Adding momentum

In [None]:
# Adding momentum to SGD optimizer
momentum = 0.1
title = 'momentum'

momentum_opt = keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.1)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

mom_model = make_model(opt_ch1, opt_ch2, use_dropout)

mom_model.compile(optimizer=momentum_opt, loss=loss, metrics='accuracy')
oc_histories[title] = mom_model.fit(x_train,
                                    y_train,
                                    batch_size=batch_size,
                                    epochs=epochs,
                                    validation_data=(x_test, y_test),
                                    verbose=2)

filename = './results/parta/q3/history_{}'.format(title)
with open(filename, 'wb') as file_pi:
      pickle.dump(oc_histories[title].history, file_pi)

### Part b: Using RMSProp algorithm for learning

In [None]:
# Using RMSProp algorithm for learning
title = 'rmsprop'

rmsp_opt = keras.optimizers.RMSprop(learning_rate=learning_rate, epsilon=1)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

rmsp_model = make_model(opt_ch1, opt_ch2, use_dropout)
rmsp_model.compile(optimizer=rmsp_opt, loss=loss, metrics='accuracy')

oc_histories[title] = rmsp_model.fit(x_train,
                                        y_train,
                                        batch_size=batch_size,
                                        epochs=epochs,
                                        validation_data=(x_test, y_test),
                                        verbose=2)


filename = './results/parta/q3/history_{}'.format(title)
with open(filename, 'wb') as file_pi:
      pickle.dump(oc_histories[title].history, file_pi)

### Part c: Using Adam Optimizer for learning

In [None]:
# Using Adam optimizer for learning
title = 'adam'
use_dropout = False

adam_opt = keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

adam_model = make_model(opt_ch1, opt_ch2, use_dropout)
adam_model.compile(optimizer=adam_opt, loss=loss, metrics='accuracy')

oc_histories[title] = adam_model.fit(x_train,
                                   y_train,
                                   batch_size=batch_size,
                                   epochs=epochs,
                                   validation_data=(x_test, y_test),
                                   verbose=2)

filename = './results/parta/q3/history_{}'.format(title)
with open(filename, 'wb') as file_pi:
      pickle.dump(oc_histories[title].history, file_pi)

### Part d: Adding dropout=0.5 to the 2 fully connected layers

In [None]:
# Adding dropout=0.5 to the 2 fully connected layers
title = 'dropout'
use_dropout = True

dropout_opt = keras.optimizers.SGD(learning_rate=learning_rate)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

dropout_model = make_model(opt_ch1, opt_ch2, use_dropout)
dropout_model.compile(optimizer=dropout_opt, loss=loss, metrics='accuracy')

oc_histories[title] = dropout_model.fit(x_train,
                                   y_train,
                                   batch_size=batch_size,
                                   epochs=epochs,
                                   validation_data=(x_test, y_test),
                                   verbose=2)

filename = './results/parta/q3/history_{}'.format(title)
with open(filename, 'wb') as file_pi:
      pickle.dump(oc_histories[title].history, file_pi)

## Plot results for all models in Q3

In [None]:
# Plot all figures
for i, model in enumerate(oc_histories):
  fig = plt.figure(num=i, figsize=(12,4))

  # Subplot 1
  plt.subplot(1, 2, 1)

  train_loss = oc_histories[model].history['loss']
  val_loss = oc_histories[model].history['val_loss']

  plt.plot(range(1, len(train_loss) + 1), train_loss, label='Train')
  plt.plot(range(1, len(val_loss) + 1), val_loss, label='Test')

  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend()

  plt.title('Loss for {}'.format(model, fontsize=14))
  
  # Subplot 2
  plt.subplot(1, 2, 2)

  train_acc = oc_histories[model].history['accuracy']
  val_acc = oc_histories[model].history['val_accuracy']

  plt.plot(range(1, len(train_acc) + 1), train_acc, label='Train')
  plt.plot(range(1, len(val_acc) + 1), val_acc, label='Test')
  
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend()

  plt.title('Accuracy for {}'.format(model, fontsize=14))
  plt.savefig('./results/parta/q3/q3_{}.pdf'.format(model))
  plt.show()

In [None]:
for model in oc_histories:
  final_val_acc = oc_histories[model].history['val_accuracy'][-1]
  max_val_acc = np.amax(oc_histories[model].history['val_accuracy'])
  print('model: {} \t final_val_acc: {} \t max_val_acc: {}'.format(model, final_val_acc, max_val_acc))

# Question 4

In [None]:
for model in oc_histories:
  final_val_acc = oc_histories[model].history['val_accuracy'][-1]
  max_val_acc = np.amax(oc_histories[model].history['val_accuracy'])
  val_acc_df = val_acc_df.append({'model': model,
                                  'final_val_acc': final_val_acc,
                                  'max_val_acc': max_val_acc}, ignore_index=True)

# append Q1
final_val_acc = q1_history.history['val_accuracy'][-1]
max_val_acc = np.amax(q1_history.history['val_accuracy'])
val_acc_df = val_acc_df.append({'model': '50_60_Q1',
                                'final_val_acc': final_val_acc,
                                'max_val_acc': max_val_acc}, ignore_index=True)

val_acc_df