# Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras import datasets, layers, models
from google.colab import drive
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_curve, auc
import plotly.graph_objects as go
import glob

# Directories

In [None]:
drive.mount('/content/drive')

#ROOT_DIR = "/content/drive/MyDrive/Spring 2022/DTSC 870/Code" # HC Directory
ROOT_DIR = "/content/drive/MyDrive/DTSC 870/Code" #MT Directory
DATASET_02 = ROOT_DIR + "/datasets/02_FER"
DATASET_02_ROOT = ROOT_DIR + "/datasets/02_FER"
DATASET_02_TRAIN = DATASET_02 + "/train"
DATASET_02_TEST = DATASET_02+ "/test"

Mounted at /content/drive


# Image Size

In [None]:
import PIL

image = PIL.Image.open(DATASET_02_TRAIN + "/surprise/Training_99984132.jpg")
#image to open

width, height = image.size
print(width, height)

48 48


# General Loading


In [None]:
datagen = ImageDataGenerator()
image_size = (224, 224)
test_set = datagen.flow_from_directory(DATASET_02_TEST, target_size=image_size, batch_size=256, shuffle=False)
train_set = datagen.flow_from_directory(DATASET_02_TRAIN, target_size=image_size, batch_size=256)



Found 7178 images belonging to 7 classes.
Found 28709 images belonging to 7 classes.


# Model Creation

In [None]:
def create_model_V1():
  model = models.Sequential()
  model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 3)))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Dropout(0.4))
  model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Dropout(0.4))
  model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  #model.add(layers.MaxPooling2D((2, 2)))
  #model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(layers.Flatten())
  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dense(1, activation='sigmoid'))
  return model


model_1 = create_model_V1()
model_1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 46, 46, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 23, 23, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 23, 23, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 21, 21, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 10, 10, 64)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 10, 10, 64)        0

# Glob for loading data

In [None]:
import glob

#borrowed from svm code
def get_data_df(dir):
  # modified code from: https://www.kaggle.com/namgalielei/simple-load-images-and-count-number-of-each-class

  train_df = pd.DataFrame()

  trainset = glob.glob(dir)
  #print(trainset)
  train_df['filename'] = [img.split("/")[-2]+"/"+img.split("/")[-1] for img in trainset]
  train_df['label'] = [img.split("/")[-2] for img in trainset]

  return train_df

In [None]:
test_train = get_data_df(DATASET_02_TRAIN+"/*/*.jpg")
test_train

Unnamed: 0,filename,label
0,angry/Training_78323011.jpg,angry
1,angry/Training_78379622.jpg,angry
2,angry/Training_78392730.jpg,angry
3,angry/Training_78448765.jpg,angry
4,angry/Training_78464977.jpg,angry
...,...,...
28704,surprise/Training_14497906.jpg,surprise
28705,surprise/Training_14647859.jpg,surprise
28706,surprise/Training_14655080.jpg,surprise
28707,surprise/Training_14714646.jpg,surprise


In [None]:
from sklearn.model_selection import KFold, StratifiedKFold

train_data = get_data_df(DATASET_02_TRAIN+"/*/*.jpg")
Y = train_data[['label']]

#Y = final_train[['label']]
#n = len(final_train)   

skf = StratifiedKFold(n_splits = 5, random_state = 7, shuffle = True) 

#Un needed preprocessing aside from rescale
idg = ImageDataGenerator(rescale=1./255)

#model name
def get_model_name(k):
    return 'model_'+str(k)+'.h5'
def model_test(fold_count, epoch, model_method, image_size):
  skf = StratifiedKFold(n_splits = fold_count, random_state = 7, shuffle = True)
  train_data = get_data_df(DATASET_02_TRAIN+"/*/*.jpg")
  #Incorperate preprocessing in the method 
  #####################################################################
  #aug_train = get_data_df(DATASET_01_ROOT+"/Aug_train/*/*.jpg")
  #aug_train['filename'] = aug_train['filename'].str[7:]
  #final_train = test_train.append(aug_train, sort=False, ignore_index=True)


  #######################################################################
  #idg = ImageDataGenerator(rescale=1./255)
  idg = ImageDataGenerator()
  Y = train_data[['label']]
  n = len(train_data)    
  VALIDATION_ACCURACY = []
  VALIDATION_LOSS = []

  #create a directory
  save_dir = ROOT_DIR+'/datasets/02_FER/Saved_CNN_Models/Testing/'+model_method.__name__+'/'+str(epoch)+'epochs/'
  img_dir = DATASET_02_TRAIN

  fold_var = 1

  #n is number of samples, doesn't care about x values for split, just y
  for train_index, val_index in skf.split(np.zeros(n),Y):

    #Index value for fold
    training_data = train_data.iloc[train_index]
    validation_data = train_data.iloc[val_index]
    #generator for train
    #print("hi")
    #Modify the training size, have an input adjustability 
    train_data_generator = idg.flow_from_dataframe(training_data, directory = img_dir,
                      target_size=image_size, batch_size=256,
                    x_col = "filename", y_col = "label",
                     shuffle = True)
    #print("hi")
    #generator for validation
    valid_data_generator  = idg.flow_from_dataframe(validation_data, directory = img_dir,
                target_size=image_size, batch_size=256,
                x_col = "filename", y_col = "label",
                shuffle = True)
    #print("hi")
    #create a method for this
    # CREATE NEW MODEL
    model = model_method()
    # COMPILE NEW MODEL
    #Binary cross entropy, opt will be adam, metric will be binary accuracy?
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
            optimizer= tf.keras.optimizers.Adam(),#learning_rate=0.0001),
            metrics=['accuracy'])
    
    # CREATE CALLBACKS
    #Unsure about checpoint
    checkpoint = tf.keras.callbacks.ModelCheckpoint(save_dir+model_method.__name__+"_"+get_model_name(fold_var), 
                monitor='val_accuracy', verbose=1, 
                save_best_only=True, mode='max')
  
    callbacks_list = [checkpoint]
    # There can be other callbacks, but just showing one because it involves the model name
    # This saves the best model
    # FIT THE MODEL
    #Include the weights for classes
    history = model.fit(train_data_generator,
            epochs=epoch,
            callbacks=callbacks_list,
            validation_data=valid_data_generator)
            #,class_weight = class_weights)
  
    #PLOT HISTORY
    #Plots for whichever
    #		:
    #plt.plot(history.history['binary_accuracy'], label='accuracy')
    #plt.plot(history.history['val_binary_accuracy'], label = 'val_accuracy')
    #plt.xlabel('Epoch')
    #plt.ylabel('Accuracy')
    #plt.ylim([0.3, 1])
    #plt.legend(loc='lower right')
    #plt.show()
    #frame = {'Accuracy': history.history['binary_accuracy'], 'Val_Accuracy': history.history['val_binary_accuracy']}
    #fig = px.line(frame)
    #fig.show()
    fig = go.Figure()
    fig.add_trace(go.Scatter(y= history.history['binary_accuracy'],
                      mode='lines+markers',
                      name='Training Accuracy'))
    fig.add_trace(go.Scatter(y= history.history['val_binary_accuracy'],
                      mode='lines+markers',
                      name='Validation Accuracy'))
    fig.update_yaxes(range=[0.40, 1])
    fig.update_layout(
      title="Fold "+ str(fold_var)+ "'s performance",
      title_x=0.5,
      xaxis_title="Epochs",
      yaxis_title="Accuracy",
      autosize=False,
      width=800,
      height=400,
      margin=dict(l=20, r=20, t=40, b=20),)
    fig.show()
    #		:
    # LOAD BEST MODEL to evaluate the performance of the model
    model.load_weights(save_dir+model_method.__name__+"_" + "model_"+str(fold_var)+".h5")
    
    results = model.evaluate(test_set)
    results = dict(zip(model.metrics_names,results))
    
    VALIDATION_ACCURACY.append(results['binary_accuracy'])
    VALIDATION_LOSS.append(results['loss'])
    
    tf.keras.backend.clear_session()
    
    fold_var += 1

# Model Training


In [None]:
model_test(5, 15, create_model_V1, (48,48))

Found 22967 validated image filenames belonging to 7 classes.
Found 5742 validated image filenames belonging to 7 classes.
Epoch 1/15
10/90 [==>...........................] - ETA: 1:02:31 - loss: 19.9550 - accuracy: 0.6761

KeyboardInterrupt: ignored

# Transfer Learning Model Creation


In [None]:
def transfer_learning_model_resnet_50():

  IMG_SHAPE = (224,224,3,)
 
  inputs = tf.keras.Input(shape=IMG_SHAPE)
  #inputs = tf.image.grayscale_to_rgb(y)
 
  x = tf.keras.applications.resnet50.preprocess_input(inputs)
  base_model = tf.keras.applications.resnet50.ResNet50(input_shape=(224,224,3,),
                                               include_top=False,
                                               weights='imagenet')
  print(len(base_model.layers))
  base_model.trainable = True

  fine_tune_at = 130
  for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

  x = base_model(x)
  global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
  x = global_average_layer(x)
  x = tf.keras.layers.Dropout(0.5)(x)
  prediction_layer = tf.keras.layers.Dense(7, activation = "softmax")
  outputs = prediction_layer(x)
  model = tf.keras.Model(inputs, outputs)
  #print(len(model.trainable_variables))
  return model

transfer_model_resnet_50 = transfer_learning_model_resnet_50()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
175


## Model Testing

In [None]:
transfer_model_resnet_50.load_weights(DATASET_02+'/Saved_Kaggle_CNN_Weights/transfer_learning_model_resnet_50_model_2.h5')

In [None]:
predictions = transfer_model_resnet_50.predict(test_set)  


In [None]:
im_class = tf.argmax(predictions, axis=-1) #either tf.math.argmax() or tf.argmax will work
print(im_class)

tf.Tensor([5 0 5 ... 6 6 6], shape=(7178,), dtype=int64)


In [None]:
test_set.classes

array([0, 0, 0, ..., 6, 6, 6], dtype=int32)

In [None]:
def weight_testing(model_v, weight_loc):
  #min_max_scaler = preprocessing.MinMaxScaler()


  model_v.load_weights(weight_loc)
  predictions = model_v.predict(test_set)
  predict_final = tf.argmax(predictions, axis=-1)
  #predict_final = K.round(predictions)
  conf_mat = confusion_matrix(test_set.classes, predict_final)
  conf_mat_perc = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
  class_acc = conf_mat_perc.diagonal()
  print(conf_mat)
  print(f"Normal accuracy: {class_acc[0]}")
  print(f"Tumor accuracy: {class_acc[1]}")
  target_names = ['Normal', 'Tumor']
  print(classification_report(test_set.classes, predict_final, target_names=target_names))

  fpr_keras, tpr_keras, thresholds_keras = roc_curve(test_set.classes, predictions)
  auc_keras = auc(fpr_keras, tpr_keras)
  #print(auc_keras)
  return fpr_keras, tpr_keras, auc_keras


In [None]:
predict_final = tf.argmax(predictions, axis=-1)
conf_mat = confusion_matrix(test_set.classes, predict_final)
conf_mat_perc = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
class_acc = conf_mat_perc.diagonal()
print(class_acc)


[0.45093946 0.51351351 0.45117188 0.82976325 0.59042985 0.6904571
 0.70998797]


In [None]:
import os
my_list = os.listdir(DATASET_02_TEST)
print(my_list)

['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [None]:
for i in range(len(my_list)):
  print(f'{my_list[i].capitalize()} accuracy: {class_acc[i]}')
print(conf_mat)
print(classification_report(test_set.classes, predict_final, target_names=my_list))

Angry accuracy: 0.4509394572025052
Disgust accuracy: 0.5135135135135135
Fear accuracy: 0.451171875
Happy accuracy: 0.8297632468996617
Neutral accuracy: 0.5904298459042985
Sad accuracy: 0.6904570970328789
Surprise accuracy: 0.7099879663056559
[[ 432    8  116   43   94  251   14]
 [  22   57    7    6    1   16    2]
 [  81    2  462   39   82  305   53]
 [  30    1   36 1472   78  129   28]
 [  49    1   57   66  728  320   12]
 [  75    5   93   51  150  861   12]
 [  24    0   94   50   37   36  590]]
              precision    recall  f1-score   support

       angry       0.61      0.45      0.52       958
     disgust       0.77      0.51      0.62       111
        fear       0.53      0.45      0.49      1024
       happy       0.85      0.83      0.84      1774
     neutral       0.62      0.59      0.61      1233
         sad       0.45      0.69      0.54      1247
    surprise       0.83      0.71      0.77       831

    accuracy                           0.64      7178
   