In [1]:
# === Imports === #

import tensorflow as tf
from tensorflow.keras.models import load_model

from tqdm import tqdm

# Various python packages are used in this notebook. Please get yourself used to them (optional).
import pandas as pd  # used for storing a tabular representation of the dataset, similar to XLS files.
from pathlib import Path # used to check if the saved model files and accessories.
import requests #used to request remote judge.csv evaluation 
from sklearn.preprocessing import StandardScaler  # used for normalization of dataset
from sklearn.preprocessing   import LabelBinarizer    # used for splitting the gender column
from sklearn.preprocessing   import MinMaxScaler      # used for normalization of dataset
from sklearn.model_selection import train_test_split  # used for performing the train-test split of a dataframe
import cv2                                            # OpenCV used for image processing
import random   #random number generator
import datetime #used to get current date/time
import math     #math/numerical functions
import os       #os specific functions, like file open/close etc.
import gc       #garbage collection module -- used to manually clean up memory spaces/references.


from sklearn.preprocessing import OneHotEncoder   #My favorite categorical to numerical feature conversion tool
from tensorflow import keras  # keras used for construction of the Artificial neural network
from keras.models import Model, Sequential #keras model architectures
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D #types of layers
from keras.losses import mean_squared_error, huber, log_cosh  #built-in loss 
from tensorflow.python.keras.saving import hdf5_format  #used for saving models 
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard  #callbacks
from keras.models import model_from_json  #used for loading model architecture from json file
import h5py  #saved model type

import matplotlib.pyplot as plt  # used for training visualization
import numpy as np  # numpy arrays used for matrix computations

# === Extra Configurations for the GPU Environment === #
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') 
if len(physical_devices)>0: #If you have at least one "configured" GPU, let's use it; otherwise, pass
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

# https://github.com/zaid478/Transfer-Learning-from-Xception-Model-in-Keras-/blob/master/transfer_learn.py

from keras.applications import xception
from keras import backend as K
from keras.utils import np_utils

from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

In [2]:
"""
Setting work environment with dataset. If on Google colaboratory, we need to extract dataset stored in google drive,
otherwise the dataset is already there.
"""
try:
    from google.colab import drive
    print('Running on Google colab...')
    drive.mount('/content/drive')
except:
    print('Running on local machine...')

Running on Google colab...
Mounted at /content/drive


# IMPORTANT VARIABLES

In [3]:
train_path = 'drive/MyDrive/Colab Notebooks/DAiSEE/gen-full-4/Train/'
test_path = 'drive/MyDrive/Colab Notebooks/DAiSEE/gen-full-4/Test/'

image_shape = (224, 299, 3) # HEIGHT, WIDTH, CHANNELS

# Train a new Xception model

In [4]:
xception_tl = tf.keras.applications.Xception(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=image_shape,
    pooling=None,
    classes=4,
    classifier_activation="softmax",
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [5]:
for layer in xception_tl.layers:
	layer.trainable=False

In [6]:
x = xception_tl.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu', name='learning_layer')(x)
y1 = Dense(4, activation='softmax', name='y1')(x)

xception_tl_DAiSEE=Model(xception_tl.input, y1)

In [7]:
xception_tl_DAiSEE.output

<KerasTensor: shape=(None, 4) dtype=float32 (created by layer 'y1')>

https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly

In [8]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path, list_IDs, labels, batch_size=32, dim=(480,640), n_channels=3,
                 n_classes=4, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.path = path
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size, 4), dtype='float32')

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            filename = os.path.join(self.path, self.list_IDs[ID])
            x = cv2.imread(filename)
            X[i,] = tf.keras.applications.xception.preprocess_input(x)
            y[i,:] = tf.one_hot(self.labels[ID][1], depth=4)

        return X, tf.convert_to_tensor(y)

In [10]:
import os
import shutil
#os.getcwd()

temp_data_location = 'temp_data'

if temp_data_location in os.listdir(os.getcwd()):
    shutil.rmtree(temp_data_location)

os.mkdir(temp_data_location)

#assert temp_data_location not in os.listdir(os.getcwd())


local_train_path = os.path.join(temp_data_location, 'train')
local_test_path = os.path.join(temp_data_location, 'test') 

# Copytree makes the folders
#os.mkdir(local_train_path)
#os.mkdir(local_test_path)

shutil.copytree(train_path, local_train_path) 
shutil.copytree(test_path, local_test_path)

'temp_data/test'

In [11]:
# Generator Parameters
params = {'dim': (image_shape[0], image_shape[1]),
          'batch_size': 64,
          'n_classes': 4,
          'n_channels': 3,
          'shuffle': True}

folder_path = './drive/MyDrive/Colab Notebooks/DAiSEE/'
all_labels = pd.read_csv(os.path.join(folder_path, 'Labels/AllLabels.csv'))
all_labels['ID_num'] = all_labels['ClipID'].str[:-4]

train_labels = pd.read_csv(os.path.join(folder_path, 'Labels/TrainLabels.csv'))
train_labels['ID_num'] = train_labels['ClipID'].str[:-4]

test_labels = pd.read_csv(os.path.join(folder_path, 'Labels/TestLabels.csv'))
test_labels['ID_num'] = test_labels['ClipID'].str[:-4]

# Train Set
file_labels = []
print("building train set")
for filename in tqdm(os.listdir(local_train_path)):
    try:
        sample_ID = filename[:filename.index('-')]
        label = all_labels[all_labels['ID_num']==sample_ID].values.tolist()[0][1:-1]
        file_labels.append((filename, np.array(label)))
    except IndexError:
        print(sample_ID)

label_arr = np.array(file_labels, dtype=object)
X_train = label_arr[:, 0]
y_train = label_arr[:, 1]

# Test Set
file_labels = []
print("building test set")
for filename in tqdm(os.listdir(local_test_path)):
    try:
        sample_ID = filename[:filename.index('-')]
        
        label = test_labels[test_labels['ID_num']==sample_ID].values.tolist()[0][1:-1]
        file_labels.append((filename, np.array(label)))
    except IndexError:
        print(sample_ID)

label_arr = np.array(file_labels, dtype=object)
X_test = label_arr[:, 0]
y_test = label_arr[:, 1]

building train set


100%|██████████| 38374/38374 [00:36<00:00, 1046.76it/s]


building test set


  3%|▎         | 358/13062 [00:00<00:07, 1784.94it/s]

9988260264
9988260229
9988260212
9988260215
9988260127
9988260126
9988260250
9988260276
9988260146
998826021
9988260141
9988260137
998826014
9988260138


  4%|▍         | 537/13062 [00:00<00:07, 1710.42it/s]

9988260236
998826016
9988260224
9988260150
9988260127
9988260145
998826019
9988260243
9988260249


  7%|▋         | 888/13062 [00:00<00:07, 1735.71it/s]

9988260250
9988260216
9988260160
9988260135
9988260279
998826026
9988260139
9988260250
9988260233
9988260133
9988260134
9988260228
9988260123
9988260154
9988260163
9988260246
9988260243
9988260259


 11%|█         | 1423/13062 [00:00<00:06, 1731.21it/s]

9988260154
9988260152
9988260212
9988260259
9988260228
998826014
9988260214
9988260264
9988260210
9988260254
9988260277
9988260167
9988260144
9988260243
9988260248
9988260234
9988260123
9988260275
9988260254
9988260273
9988260251
9988260160
9988260237
9988260130
9988260247
9988260273
9988260138
9988260141
9988260236


 15%|█▌        | 1968/13062 [00:01<00:06, 1767.37it/s]

998826024
9988260144
9988260134
9988260264
9988260143
9988260139
9988260139
9988260141
9988260167
9988260240
9988260121
9988260246
9988260224
9988260269
998826024
9988260132
9988260143
9988260154
998826016
9988260146
998826026
998826017
9988260132
9988260279


 19%|█▉        | 2510/13062 [00:01<00:05, 1777.75it/s]

9988260248
9988260129
998826019
9988260259
9988260246
9988260129
9988260237
9988260279
9988260211
9988260273
9988260233
9988260144
998826013
9988260212
9988260135
9988260127
9988260211
9988260222
9988260144
9988260247
9988260250
9988260249


 23%|██▎       | 3056/13062 [00:01<00:05, 1794.17it/s]

9988260277
9988260257
998826019
9988260152
998826017
9988260232
998826017
9988260121
998826024
998826017
9988260249
998826022
9988260231
9988260241
9988260269
9988260141
9988260230


 28%|██▊       | 3594/13062 [00:02<00:05, 1769.10it/s]

9988260254
9988260246
9988260232
9988260143
9988260165
9988260215
9988260237
9988260121
998826019
9988260268
9988260216
9988260150
9988260144
9988260255
9988260243
9988260235
998826024
9988260233
9988260229
9988260241
9988260233
998826014
9988260241
9988260214
9988260217
9988260229
9988260259
9988260163
9988260150
9988260137
9988260150


 32%|███▏      | 4135/13062 [00:02<00:05, 1775.43it/s]

9988260251
9988260146
998826016
9988260132
9988260237
998826026
9988260163
9988260255
9988260250
9988260123
9988260157
9988260242
9988260275
998826022
9988260210
9988260276
9988260216
9988260234
9988260254
9988260268
9988260229
9988260247


 36%|███▌      | 4668/13062 [00:02<00:04, 1758.33it/s]

998826022
9988260216
9988260154
9988260152
9988260211
9988260216
998826019
9988260230
9988260217
9988260214
9988260222
9988260235
998826013
9988260255
9988260212
9988260237
9988260215
9988260157
9988260152
9988260152
9988260241
9988260268
9988260270


 40%|███▉      | 5199/13062 [00:02<00:04, 1727.90it/s]

9988260233
9988260239
9988260121
9988260211
9988260242
9988260250
9988260141
9988260157
998826021
9988260138
9988260227
998826016
9988260157
9988260281
9988260273
9988260235
9988260279
9988260277
9988260273
9988260235
9988260160
9988260257


 44%|████▍     | 5737/13062 [00:03<00:04, 1767.97it/s]

9988260146
998826019
9988260246
9988260150
9988260248
998826023
9988260160
9988260269
9988260145
9988260257
9988260222
9988260154
998826023
998826017
9988260241
9988260210
9988260240
9988260123


 47%|████▋     | 6097/13062 [00:03<00:03, 1777.15it/s]

9988260163
998826014
9988260234
9988260268
9988260232
9988260211
9988260222
998826014
9988260217
9988260254
9988260255
9988260214
9988260242
9988260251
9988260228
9988260245
9988260145
9988260241
9988260212
9988260165

 52%|█████▏    | 6815/13062 [00:03<00:03, 1782.05it/s]


9988260139
9988260251
9988260165
9988260165
9988260235
9988260237
9988260259
998826024
9988260134
9988260281
9988260245
998826017
9988260275
9988260165
9988260216
9988260234
9988260249
9988260134
9988260222
9988260227
9988260123
9988260264
9988260268


 56%|█████▌    | 7345/13062 [00:04<00:03, 1731.55it/s]

9988260127
9988260129
998826013
9988260211
9988260268
9988260121
9988260137
9988260240
9988260248
9988260215
9988260132
9988260141
9988260234
9988260228
9988260240
9988260157
9988260139
9988260251
9988260138
9988260232
9988260273


 60%|██████    | 7878/13062 [00:04<00:02, 1734.07it/s]

9988260214
9988260247
9988260217
9988260243
9988260249
9988260130
9988260217
9988260231
9988260126
9988260144
998826026
9988260143
9988260255
998826017
9988260132
9988260235
9988260154
9988260145
9988260134
9988260242
9988260275
998826023
9988260127
9988260150
9988260276


 63%|██████▎   | 8238/13062 [00:04<00:02, 1758.13it/s]

9988260146
9988260269
9988260229
998826023
9988260276
9988260277
9988260264
9988260134
9988260137
9988260134
9988260250
9988260257
9988260230
998826023
9988260210
9988260227
9988260157
9988260135
9988260139
9988260281
9988260236


 68%|██████▊   | 8947/13062 [00:05<00:02, 1757.23it/s]

998826013
9988260163
9988260273
9988260230
9988260231
9988260145
998826014
9988260217
9988260237
9988260232
9988260277
998826013
9988260222
9988260239
9988260167
998826022
9988260245
9988260227
9988260157
9988260138
9988260233
9988260143


 74%|███████▍  | 9664/13062 [00:05<00:01, 1769.80it/s]

9988260269
9988260264
9988260254
9988260242
9988260167
9988260135
9988260247
9988260243
9988260123
9988260210
9988260279
9988260211
9988260127
9988260133
9988260214
9988260138
9988260236
9988260163
9988260212
9988260133
998826016
9988260126
9988260129
9988260279
9988260224
9988260275
9988260165
9988260154
9988260230
9988260249
998826023
9988260217


 77%|███████▋  | 10018/13062 [00:05<00:01, 1706.90it/s]

9988260247
9988260255
9988260129
998826021
9988260165
9988260210
9988260281
9988260150
998826021
9988260215
9988260126
9988260139
9988260269
9988260233
9988260123
9988260257
9988260138
9988260135
9988260228
9988260231
9988260210
9988260145
9988260248
9988260137


 81%|████████  | 10541/13062 [00:06<00:01, 1668.24it/s]

9988260152
9988260163
9988260245
9988260143
9988260279
9988260231
9988260229
998826022
9988260259
998826016
9988260228
9988260126
9988260130
9988260133
9988260239
9988260234
9988260241
9988260141
9988260224
9988260268
9988260270
9988260160
9988260227
9988260270
9988260251


 86%|████████▌ | 11245/13062 [00:06<00:01, 1742.97it/s]

9988260235
9988260246
9988260270
9988260145
9988260146
9988260132
9988260229
9988260239
9988260270
9988260215
9988260224
9988260212
9988260276
9988260216
9988260248
9988260236
998826026
9988260143
9988260167
9988260228
9988260127
9988260275
9988260146
9988260276
9988260121
9988260227
9988260231
9988260130
9988260137
9988260240
9988260214


 92%|█████████▏| 11964/13062 [00:06<00:00, 1764.34it/s]

9988260135
9988260243
9988260121
9988260234
9988260281
9988260152
9988260230
9988260254
9988260230
998826026
9988260277
9988260133
9988260133
9988260281
9988260130
9988260130
9988260240
9988260227
9988260231
9988260160
9988260277
9988260276
9988260245
9988260167
9988260236
998826021
9988260275
9988260126
9988260246
9988260144
998826021
9988260130
9988260239
9988260224
9988260126
9988260257
9988260135


 97%|█████████▋| 12683/13062 [00:07<00:00, 1789.38it/s]

9988260255
9988260259
9988260269
9988260239
9988260240
9988260242
9988260251
9988260257
998826026
998826022
998826021
998826013
998826019
9988260132
9988260167
9988260270
9988260129
9988260281
9988260222
9988260129
9988260270
9988260242
9988260160
9988260247
998826024
9988260248
998826013
9988260236
9988260133
9988260232


100%|██████████| 13062/13062 [00:07<00:00, 1741.53it/s]

998826014
998826016
9988260249
9988260239
9988260224
998826022
998826024
9988260137
9988260232
9988260245
998826023
9988260245
9988260264
9988260215





In [12]:
y_test.shape

(12488,)

In [13]:
# Generators
training_generator = DataGenerator(local_train_path, X_train, y_train, **params)
validation_generator = DataGenerator(local_test_path, X_test, y_test, **params) 

In [14]:
'''
class_weight = {0: 75.,
                1: 10.,
                2: 1.,
                3: 1.}
'''
class_weight = {0: 1.,
                1: 1.,
                2: 1.,
                3: 1.}

In [15]:
%reload_ext tensorboard
model_path = os.path.join(folder_path, 'saved_models/model_' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '_.sav')
log_dir = os.path.join(folder_path, "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

tensorboard_cbk = TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping_cbk = EarlyStopping(monitor='val_accuracy', patience=10, verbose=0, mode='min')
mcp_save_cbk = ModelCheckpoint(model_path+'.mcp.hdf5', save_best_only=True, monitor='val_accuracy', mode='min')
reduce_lr_plateau_cbk = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, mode='min')
callbacks = [early_stopping_cbk, mcp_save_cbk, reduce_lr_plateau_cbk, tensorboard_cbk]

In [16]:
xception_tl_DAiSEE.compile(loss = 'categorical_crossentropy', optimizer='adam',
                         metrics = ['cosine_similarity', 'mean_absolute_error', 'accuracy'])

In [17]:
history = xception_tl_DAiSEE.fit(training_generator, validation_data=validation_generator, class_weight=class_weight, batch_size=64, epochs=50, callbacks=callbacks)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 15: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 22: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 23/50
Epoch 24/50
Epoch 25/50


# END