In [None]:
# === Imports === #

import tensorflow as tf
from tensorflow.keras.models import load_model

from tqdm import tqdm

# Various python packages are used in this notebook. Please get yourself used to them (optional).
import pandas as pd  # used for storing a tabular representation of the dataset, similar to XLS files.
from pathlib import Path # used to check if the saved model files and accessories.
import requests #used to request remote judge.csv evaluation 
from sklearn.preprocessing import StandardScaler  # used for normalization of dataset
from sklearn.preprocessing   import LabelBinarizer    # used for splitting the gender column
from sklearn.preprocessing   import MinMaxScaler      # used for normalization of dataset
from sklearn.model_selection import train_test_split  # used for performing the train-test split of a dataframe
import cv2                                            # OpenCV used for image processing
import random   #random number generator
import datetime #used to get current date/time
import math     #math/numerical functions
import os       #os specific functions, like file open/close etc.
import gc       #garbage collection module -- used to manually clean up memory spaces/references.


from sklearn.preprocessing import OneHotEncoder   #My favorite categorical to numerical feature conversion tool
from tensorflow import keras  # keras used for construction of the Artificial neural network
from keras.models import Model, Sequential #keras model architectures
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D #types of layers
from keras.losses import mean_squared_error, huber, log_cosh  #built-in loss 
from tensorflow.python.keras.saving import hdf5_format  #used for saving models 
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard  #callbacks
from keras.models import model_from_json  #used for loading model architecture from json file
import h5py  #saved model type

import matplotlib.pyplot as plt  # used for training visualization
import numpy as np  # numpy arrays used for matrix computations

# === Extra Configurations for the GPU Environment === #
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') 
if len(physical_devices)>0: #If you have at least one "configured" GPU, let's use it; otherwise, pass
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

# https://github.com/zaid478/Transfer-Learning-from-Xception-Model-in-Keras-/blob/master/transfer_learn.py

from keras.applications import xception
from keras import backend as K
from keras.utils import np_utils

from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

In [None]:
"""
Setting work environment with dataset. If on Google colaboratory, we need to extract dataset stored in google drive,
otherwise the dataset is already there.
"""
try:
    from google.colab import drive
    print('Running on Google colab...')
    drive.mount('/content/drive')
except:
    print('Running on local machine...')

Running on Google colab...
Mounted at /content/drive


# IMPORTANT VARIABLES

In [None]:
train_path = 'drive/MyDrive/Colab Notebooks/DAiSEE/gen-full-4/Train/'
test_path = 'drive/MyDrive/Colab Notebooks/DAiSEE/gen-full-4/Test/'

image_shape = (224, 299, 3) # HEIGHT, WIDTH, CHANNELS

# Train a new Xception model

In [None]:
xception_tl = tf.keras.applications.Xception(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=image_shape,
    pooling=None,
    classes=2,
    classifier_activation="softmax",
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
for layer in xception_tl.layers:
	layer.trainable=False

In [None]:
x = xception_tl.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu', name='learning_layer')(x)
y1 = Dense(2, activation='softmax', name='y1')(x)

xception_tl_DAiSEE=Model(xception_tl.input, y1)

In [None]:
xception_tl_DAiSEE.output

<KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'y1')>

https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path, list_IDs, labels, batch_size=32, dim=(480,640), n_channels=3,
                 n_classes=4, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.path = path
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size, self.n_classes), dtype='float32')

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            filename = os.path.join(self.path, self.list_IDs[ID])
            x = cv2.imread(filename)
            X[i,] = tf.keras.applications.xception.preprocess_input(x)
            #y[i,:] = tf.one_hot(self.labels[ID][1], depth=4)
            y[i,:] = tf.one_hot(self.labels[ID][1]>=2, depth=2) # binary classification

        return X, tf.convert_to_tensor(y)

In [None]:
'''
import numpy as np
y = [3, 2, 1, 2, 3, 1, 0, 0 ,1]
y = [1 if  i>=2 else 0 for i in y]
print(y)
'''

'\nimport numpy as np\ny = [3, 2, 1, 2, 3, 1, 0, 0 ,1]\ny = [1 if  i>=2 else 0 for i in y]\nprint(y)\n'

In [None]:
'''
Alternative method for loading the dataset via tar file
'''

import os
import shutil
import tarfile

tar_name = 'gen-full-4.tar'
tar_path = 'drive/MyDrive/Colab Notebooks/DAiSEE/' + tar_name

temp_data_location = 'temp_data'

if temp_data_location in os.listdir(os.getcwd()):
    shutil.rmtree(temp_data_location)

os.mkdir(temp_data_location)

shutil.copyfile(tar_path, os.path.join('/content/', tar_name)) 

tar = tarfile.open(os.path.join('/content/', tar_name)) 
tar.extractall(path=temp_data_location)
tar.close()


In [None]:
local_train_path = os.path.join(temp_data_location, 'Train')
local_test_path = os.path.join(temp_data_location, 'Test') 

In [None]:
#shutil.copyfile(tar_path, os.path.join('/content/', tar_name)) 

In [None]:
'''

import os
import shutil
#os.getcwd()

temp_data_location = 'temp_data'

if temp_data_location in os.listdir(os.getcwd()):
    shutil.rmtree(temp_data_location)

os.mkdir(temp_data_location)

#assert temp_data_location not in os.listdir(os.getcwd())


local_train_path = os.path.join(temp_data_location, 'Train')
local_test_path = os.path.join(temp_data_location, 'Test') 

# Copytree makes the folders
#os.mkdir(local_train_path)
#os.mkdir(local_test_path)

shutil.copytree(train_path, local_train_path) 
shutil.copytree(test_path, local_test_path)
'''

"\n\nimport os\nimport shutil\n#os.getcwd()\n\ntemp_data_location = 'temp_data'\n\nif temp_data_location in os.listdir(os.getcwd()):\n    shutil.rmtree(temp_data_location)\n\nos.mkdir(temp_data_location)\n\n#assert temp_data_location not in os.listdir(os.getcwd())\n\n\nlocal_train_path = os.path.join(temp_data_location, 'Train')\nlocal_test_path = os.path.join(temp_data_location, 'Test') \n\n# Copytree makes the folders\n#os.mkdir(local_train_path)\n#os.mkdir(local_test_path)\n\nshutil.copytree(train_path, local_train_path) \nshutil.copytree(test_path, local_test_path)\n"

In [None]:
# temp solution
#shutil.copytree(local_train_path, local_test_path)

In [None]:
# Generator Parameters
params = {'dim': (image_shape[0], image_shape[1]),
          'batch_size': 64,
          'n_classes': 2,
          'n_channels': 3,
          'shuffle': True}

folder_path = './drive/MyDrive/Colab Notebooks/DAiSEE/'
all_labels = pd.read_csv(os.path.join(folder_path, 'Labels/AllLabels.csv'))
all_labels['ID_num'] = all_labels['ClipID'].str[:-4]

train_labels = pd.read_csv(os.path.join(folder_path, 'Labels/TrainLabels.csv'))
train_labels['ID_num'] = train_labels['ClipID'].str[:-4]

test_labels = pd.read_csv(os.path.join(folder_path, 'Labels/TestLabels.csv'))
test_labels['ID_num'] = test_labels['ClipID'].str[:-4]

# Train Set
file_labels = []
print("building train set")
for filename in tqdm(os.listdir(local_train_path)):
    try:
        sample_ID = filename[:filename.index('-')]
        label = all_labels[all_labels['ID_num']==sample_ID].values.tolist()[0][1:-1]
        file_labels.append((filename, np.array(label)))
    except IndexError:
        #print(sample_ID)
        pass

label_arr = np.array(file_labels, dtype=object)
X_train = label_arr[:, 0]
y_train = label_arr[:, 1]

# Test Set
file_labels = []
print("building test set")
for filename in tqdm(os.listdir(local_test_path)):
    try:
        sample_ID = filename[:filename.index('-')]
        label = all_labels[all_labels['ID_num']==sample_ID].values.tolist()[0][1:-1]
        #label = test_labels[test_labels['ID_num']==sample_ID].values.tolist()[0][1:-1]
        file_labels.append((filename, np.array(label)))
    except IndexError:
        #print(sample_ID)
        pass

label_arr = np.array(file_labels, dtype=object)
X_test = label_arr[:, 0]
y_test = label_arr[:, 1]

building train set


100%|██████████| 38374/38374 [00:39<00:00, 972.75it/s]


building test set


100%|██████████| 13062/13062 [00:13<00:00, 1003.95it/s]


In [None]:
y_test

array([array([0, 3, 0, 0]), array([2, 2, 1, 1]), array([1, 2, 0, 0]), ...,
       array([2, 2, 0, 1]), array([1, 1, 0, 0]), array([0, 3, 0, 0])],
      dtype=object)

In [None]:
y_test.shape

(12061,)

In [None]:
# Generators
training_generator = DataGenerator(local_train_path, X_train, y_train, **params)
validation_generator = DataGenerator(local_test_path, X_test, y_test, **params) 

In [None]:
'''
class_weight = {0: 75.,
                1: 10.,
                2: 1.,
                3: 1.}

class_weight = {0: 1.,
                1: 1.,
                2: 1.,
                3: 1.}
'''

#class_weight = {0: 2., 1: 1.}
class_weight = {0: 1., 1: 1.}

In [None]:
%reload_ext tensorboard
model_path = os.path.join(folder_path, 'saved_models/model_' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '_.sav')
log_dir = os.path.join(folder_path, "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

tensorboard_cbk = TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping_cbk = EarlyStopping(monitor='val_accuracy', patience=10, verbose=0, mode='min')
mcp_save_cbk = ModelCheckpoint(model_path+'.mcp.hdf5', save_best_only=True, monitor='val_accuracy', mode='min')
reduce_lr_plateau_cbk = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, mode='min')
#callbacks = [early_stopping_cbk, mcp_save_cbk, reduce_lr_plateau_cbk, tensorboard_cbk]
callbacks = [mcp_save_cbk, reduce_lr_plateau_cbk, tensorboard_cbk]

In [None]:
xception_tl_DAiSEE.compile(loss = 'binary_crossentropy', optimizer='adam',
                         metrics = ['cosine_similarity', 'mean_absolute_error', 'accuracy'])

In [None]:
history = xception_tl_DAiSEE.fit(training_generator, validation_data=validation_generator, class_weight=class_weight, batch_size=64, epochs=50, callbacks=callbacks)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 16: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 23: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 30: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 37: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 44: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-09.
Epoch 45/50
Epoc

# END