# Baseline Model

## Import Dependencies 

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.python.lib.io import file_io
from tensorflow import keras

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.optimizers import SGD # Stochastic gradient descent
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.metrics import confusion_matrix
from seaborn import heatmap # seaborn: statistical data visualization

%matplotlib inline

print('Importing successfully!')
print('tensorflow',tf.__version__)
print('keras',keras.__version__)
print('GPU',tf.test.is_gpu_available())

Importing successfully!
tensorflow 2.4.1
keras 2.4.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU False


## Import datasets

In [2]:
BS = 128 # batch size

def get_datagen(dataset, aug=False):
    if aug:
        datagen = ImageDataGenerator(
                            rescale=1./255,
                            featurewise_center=False,
                            featurewise_std_normalization=False,
                            rotation_range=10,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            zoom_range=0.1,
                            horizontal_flip=True)
    else:
        datagen = ImageDataGenerator(rescale=1./255)

    return datagen.flow_from_directory(
            dataset,
            target_size =(48, 48),
            color_mode ='grayscale',
            shuffle = True,
            class_mode = 'categorical',
            batch_size = BS)

In [3]:
train_generator  = get_datagen("/Users/ouyang/Documents/GitHub/ST7_FER_Projet/FER_Dataset/train", True)
dev_generator    = get_datagen("/Users/ouyang/Documents/GitHub/ST7_FER_Projet/FER_Dataset/test-private")
test_generator  = get_datagen("/Users/ouyang/Documents/GitHub/ST7_FER_Projet/FER_Dataset/test-public")

Found 28709 images belonging to 7 classes.
Found 3589 images belonging to 7 classes.
Found 3589 images belonging to 7 classes.


## Build the basic model

In order to better understand the problem, we decided to first try to tackle this problem from scratch, building a vanilla CNN using four 3x3x32 same-padding, ReLU filters, interleaved with two 2x2 MaxPool layers, and completed with a FC layer and softmax layer. We also added batchnorm and 20% dropout layers to address high variance and improve our accuracy

This model consists of three stages of convolutional and max-pooling layers, followed by an FC layer of size 1024 and a softmax output layer. The convolutional layers use 32, 32, and 64 filters of size 5x5, 4x4, and 5x5, respectively. The max-pooling layers use kernels of size 3x3 and stride 2. ReLU was utilized as the activation function. To improve performance, we also added batchnorm at every layer and 30% dropout after the last FC layer. To fine tune the model, we trained it for 300 epochs, optimizing the cross-entropy loss using stochastic gradient descent with a momentum of 0.9. The initial learning rate, batch size, and weight decay are fixed at 0.1, 128, and 0.0001, respectively. The learning rate is halved if the validation accuracy does not improve for 10 epochs.

<p><b> The architecture of neural network </b></p>
<img src="Architecture_NN.jpg" alt=" The architecture of neural network" width="800">

In [4]:
Dropout_rate = 0.3
SGD_lr = 0.01 # learning rate of SGD optimiser
SGD_decay = 0.0001 #decay of SGD

In [5]:
model = keras.Sequential()

model.add(BatchNormalization(input_shape=(48,48,1)))
model.add(Conv2D(32, (3, 3), activation='relu',padding='same', input_shape=(48,48,1),name="conv1"))
model.add(BatchNormalization())
#model.add(MaxPooling2D(pool_size=(2, 2),name="maxpool1"))
model.add(Dropout(0.2))

model.add(Conv2D(32, (3, 3), activation='relu',padding='same',name="conv2"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),name="maxpool2"))         
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation='relu',padding='same',name="conv3"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),name="maxpool3"))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation='relu',padding='same',name="conv4"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),name="maxpool4"))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(1024, activation='relu',name='fc1'))
model.add(Dropout(Dropout_rate))
model.add(BatchNormalization())

model.add(Dense(7, activation='softmax',name='fcsoftmax'))

#TODO: weight decay of 0.0001...initial learning rate is set to 0.01 and reduced by a factor of 2 at every 25 epoch
sgd = SGD(lr=SGD_lr, momentum=0.9, decay=SGD_decay, nesterov=True)
model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])

## Train the model

More information about **keras.model**, please refer to the [official tutorial](https://www.tensorflow.org/api_docs/python/tf/keras/Model)

In [6]:
Epochs = 10

# reduce learning rate while training
rlrop = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',factor=0.5, patience=10, min_lr=0.00001,mode='max')
# define the checkpoint
cp_filepath='/Baseline-weights-best.hdf5'
checkpoint = ModelCheckpoint(cp_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

hist = model.fit_generator(
    generator = train_generator,
    validation_data=dev_generator, 
    #steps_per_epoch=28709// BS,
    #validation_steps=3509 // BS,
    shuffle=True,
    epochs=Epochs,
    callbacks=[rlrop,checkpoint],
#    callbacks=[rlrop],
    use_multiprocessing=False,
) 



Epoch 1/10
  1/225 [..............................] - ETA: 5:37 - loss: 2.6483 - accuracy: 0.1641

KeyboardInterrupt: 

In [None]:
print('the keys of the trained model:','\n', hist.history.keys())

## Evaluate the model

    evaluate_generator(
        generator, steps=None, callbacks=None, max_queue_size=10, workers=1,
        use_multiprocessing=False, verbose=0
    )

In [None]:
print('\n# Evaluate on dev data')
results_dev = model.evaluate_generator(dev_generator, 3509 // BS)
print('dev loss, dev acc:', results_dev)

In [None]:
print('\n# Evaluate on test data')
results_test = model.evaluate_generator(test_generator, 3509 // BS)
print('test loss, test acc:', results_test)

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper left')
plt.show()

### Save the model in folder *trained_models*

In [None]:
epoch_str = '-EPOCHS_' + str(Epochs)
dropout_str = '-DROPOUT_' + str(Dropout_rate)
test_acc = '-test_acc_%.3f' % results_test[1]
model.save('trained_models/' + 'Baseline' + epoch_str + dropout_str + test_acc + '.h5')

In [None]:
emotions = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral'}

y_pred = model.predict(dev_generator).argmax(axis=1)
y_true = dev_generator.classes

cmat_df_test=pd.DataFrame(
  confusion_matrix(y_true, y_pred, normalize='true').round(2),
  index=emotions.values(), 
  columns=emotions.values()
  )

plt.figure(figsize=(5,5))
heatmap(cmat_df_test,annot=True,cmap=plt.cm.Blues)
plt.tight_layout()
plt.title('Confusion Matrix on Private Test Set')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score
# configure image data augmentation
datagen = ImageDataGenerator(horizontal_flip=True)

# make a prediction using test-time augmentation
def tta_prediction(datagen, model, image, n_examples):
    # convert image into dataset
    samples = np.expand_dims(image, 0)
    # prepare iterator
    it = datagen.flow(samples, batch_size=n_examples)
    # make predictions for each augmented image
    yhats = model.predict_generator(it, steps=n_examples, verbose=0)
    # sum across predictions
    summed = np.sum(yhats, axis=0)
    # argmax across classes
    return np.argmax(summed)
 
 # evaluate a model on a dataset using test-time augmentation
def tta_evaluate_model(model, testX, testY):
    # configure image data augmentation
    datagen = ImageDataGenerator(horizontal_flip=True)
    # define the number of augmented images to generate per test set image
    n_examples_per_image = 7
    yhats = list()
    for i in range(len(testX)):
        # make augmented prediction
        yhat = tta_prediction(datagen, model, testX[i], n_examples_per_image)
        # store for evaluation
        yhats.append(yhat)
    # calculate accuracy
    testY_labels = np.argmax(testY, axis=1)
    acc = accuracy_score(testY_labels, yhats)
    return acc

In [None]:
print('\n# Evaluate on test data')
#TTA_results_test = tta_evaluate_model(model, X_test, Y_test)
print('test loss, test acc:', results_test)
print('TTA test acc:', TTA_results_test)