<a href="https://colab.research.google.com/github/mak109/cs6910_assignment2/blob/main/PART%20A/cs6910_assignment2_partA_question1_2_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training CNN from Scratch

## 1. Packages and imports

In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import random
import datetime
import os
import shutil
from zipfile import ZipFile
from PIL import Image
plt.rcParams["figure.figsize"] = (20,10)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,Sequential,regularizers,optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
autotune = tf.data.AUTOTUNE
from functools import reduce
import random
import uuid

## 2. UTA-RLDD preprocessed dataset downloaded from kaggle

In [1]:

!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json
api_token = {"username":"","key":""} #Place your kaggle credentials here

import json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d mak1999/uta-rldd-2

Downloading uta-rldd-2.zip to /kaggle/working
 99%|█████████████████████████████████████▋| 2.38G/2.40G [00:32<00:00, 76.0MB/s]
100%|██████████████████████████████████████| 2.40G/2.40G [00:32<00:00, 78.3MB/s]


In [3]:
filename = 'uta-rldd-2.zip'
with ZipFile(filename, 'r') as z:
    print('Extracting all the files now...')
    z.extractall()
    print('Done!')
os.remove(filename)

Extracting all the files now...
Done!


## 3. Wandb setup for hyperparameter tuning

In [5]:
import wandb
os.environ['WANDB_ENTITY'] = 'ipda526'
os.environ['WANDB_PROJECT'] = 'baseline-drowsiness-detection'
wandb.login(key='') #Place your wandb api key here
from wandb.keras import WandbCallback,WandbMetricsLogger, WandbModelCheckpoint

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


## 4. Model definition and training

In [6]:
image_size = (256,256)
num_layers = 4 #Number of convolution layers
num_dense_layers = 2 #Number of dense or fully connected layers
num_classes = 3 #0 - awake 1-drowsy 2 - low vigilant
train_dir = 'UTA-RLDD/train'
val_dir = 'UTA-RLDD/val'

In [7]:
#CNN model hyperparameters from config
def CNN(config):
    model = Sequential([
        layers.Input((image_size[0],image_size[1],3)),
        layers.experimental.preprocessing.Rescaling(1./255)
    ])
    for l in range(num_layers):
        model.add(layers.Conv2D(filters=config["filters_list"][l],kernel_size=(config["kernel_sizes"][l][0],config["kernel_sizes"][l][1]),
                        activation=config["activation"],padding="same",kernel_regularizer=regularizers.l2(config["weight_decay"])))
        if config["batch_normalization"] == 'True':
            model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(2,2)))
        model.add(layers.Dropout(config["dropout_conv"]))
        
    model.add(layers.Flatten())
    for d in range(num_dense_layers-1):
        model.add(layers.Dense(config["dense_layers"][d],activation=config["activation"],kernel_regularizer=regularizers.l2(config["weight_decay"])))
        model.add(layers.Dropout(config["dropout_dense"]))
    model.add(layers.Dense(config["dense_layers"][num_dense_layers-1],activation=config["activation"],kernel_regularizer=regularizers.l2(config["weight_decay"])))
    model.add(layers.Dense(num_classes,activation="softmax"))
    return model

In [8]:
# tf.debugging.set_log_device_placement(True)
tf.config.set_soft_device_placement(True)

In [9]:
#Training goes here
#Comment out the code related to Wandb if training is done without wandb integration
def train(config_in=None,checkpointing=False):
    config_ = {
    "kernel_sizes" : [(5,5),(3,3),(7,7),(9,9)],
    "activation" : 'elu',
    "learning_rate": 1e-4,
    "filters_list" : [64,32,32,16],
    "dense_layers" : [256,128],
    "batch_normalization": "False",
    "data_augment": "False",
    "weight_decay":0,
    "dropout_conv":0,
    "dropout_dense":0,
    "batch_size":32,
    "epochs":60
    }
    if config_in is not None:
          config = config_in
    else:
          config = config_ #Default Config

    '''Wandb Configs'''
    wandb.init(config=config)
    config = wandb.config
    #Setting run name for better readability
    wandb.run.name = "nd_"+str(len(config["dense_layers"]))+"bs_"+str(config["batch_size"])+"ac_"+str(config["activation"])
    #Removing the temporary train/val dir if existing
    shutil.rmtree(train_dir,ignore_errors=True)
    shutil.rmtree(val_dir,ignore_errors=True)
    x = random.randint(0,4)
    #Pick up a random fold and use it for validation and remaining other 4 folds for training
    for i in range(5):
        if i == x:
            print(f'Copying fold {x+1} to val...')
            shutil.copytree(f'UTA-RLDD/fold{x+1}','UTA-RLDD/val')
        else:
            print(f'Copying fold {i+1} to train...')
            fold = f'fold{i+1}'
            fold_path = os.path.join('UTA-RLDD', fold)
            for subdir, dirs, files in os.walk(fold_path):
                dest_subdir = subdir.replace(fold, 'train')
                if not os.path.exists(dest_subdir):
                    os.makedirs(dest_subdir)
                for file in files:
                    shutil.copy(os.path.join(subdir, file), os.path.join(dest_subdir, file))

    print('Done!')
    #Data Augmentation This can also be validated for better results
    if config["data_augment"] == 'True':
        data_generator = ImageDataGenerator(
        rotation_range=50, #random rotation between -50(clockwise) to 50(anti-clockwise) degree
        brightness_range=(0.2,0.8), 
        zoom_range=0.3, #zoom in range from [0.7,1.3]
        horizontal_flip=True,
        vertical_flip=True,
        width_shift_range=0.1, #Horizontal Shifting as a ratio of width
        height_shift_range=0.2,#Vertical Shifting as a ratio of height
        data_format='channels_last'
#         
        )
    else:
        data_generator = ImageDataGenerator(
            data_format='channels_last'
        )
    #Train set creation after conditional augmentation
    train_generator = data_generator.flow_from_directory(
    train_dir,
    target_size = image_size,
    batch_size = config['batch_size'],
    color_mode = 'rgb',
    class_mode = 'sparse',
    shuffle=True,
    seed=123
    )
    val_generator = ImageDataGenerator(data_format='channels_last').flow_from_directory(
        val_dir,
        target_size = image_size,
        batch_size = config['batch_size'],
        color_mode = 'rgb',
        class_mode = 'sparse',
        shuffle=True,
        seed=123
    
    )
    try:
        with tf.device('/device:GPU:0'):
            #Building Model based on config 
            model = CNN(config)
            #Early stopping to prevent overfitting
            early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=5,start_from_epoch=3)
            #Compiling model 
            model.compile(
            optimizer=optimizers.Adam(learning_rate=config["learning_rate"]),
            loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy']
            )
            #For checkpointing default value is False
            if checkpointing == True:
                current_directory = os.getcwd()
                final_directory = os.path.join(current_directory, f'models_{str(uuid.uuid1())}')
                if not os.path.exists(final_directory):
                    os.makedirs(final_directory)
                checkpoint_filepath = final_directory
                model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                  filepath=checkpoint_filepath,
                  save_weights_only=False,
                  monitor='val_accuracy',
                  mode='max',
                  save_best_only=True)
                  #Fitting Model
                history = model.fit(train_generator,
                  validation_data=val_generator,
                  epochs=config["epochs"],
                  verbose=1,
                  # callbacks = [WandbCallback()] #Used with wandb
                  callbacks = [early_stop_callback,model_checkpoint_callback] #Custom callback for checkpointing
                  )
            else:
                history = model.fit(train_generator,
                  validation_data=val_generator,
                  epochs=config["epochs"],
                  verbose=1,#WandbMetricsLogger(log_freq="epoch"),
                  callbacks = [early_stop_callback,WandbCallback(monitor='val_accuracy',mode='auto'),
                               WandbModelCheckpoint(filepath="models",monitor='val_accuracy',verbose=1,save_freq='epoch',mode='max')] #Used with wandb
                  )
            
    except RuntimeError as e:
          print(e)
    wandb.finish()
    shutil.rmtree(train_dir,ignore_errors=True)
    shutil.rmtree(val_dir,ignore_errors=True)
    return history

## Standalone training

In [10]:
history = train()
#Visualization part
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
# plt.savefig('metrics.jpg')
plt.show()

## Wandb integration for hyperparameter tuning

In [None]:
'''Wandb Sweeps '''
#Sweep configuration for runs
sweep_config = {
  "name" : "best-sweep-baseline-folds"+str(uuid.uuid1()),
  "method" : "bayes",
  "metric" : {
      "name" : "val_accuracy",
      "goal" : "maximize"
  },
  "parameters" : {
    "epochs" : {
      "values" : [20,30,40,50,60]
    },
    "learning_rate" :{
      "values" : [1e-2,1e-3,1e-4]
    },
    "kernel_sizes":{
        "values" : [[(3,3),(3,3),(3,3),(3,3)],
                    [(3,3),(9,9),(10,10),(12,12)],
                    [(3,3),(3,3),(5,5),(7,7)],
                    [(11,11),(11,11),(7,7),(5,5)],
                    [(5,5),(3,3),(7,7),(9,9)],
                    [(5,5),(5,5),(5,5),(5,5)]]
    },
    "filters_list":{
        "values" : [[32,32,32,32],[256,128,64,32],[32,64,64,128],[128,256,512,64],[64,32,64,32]]
    },
    "weight_decay":{
      "values": [0,0.0005,0.005,0.05]  
    },
    "data_augment":{
        "values": ["True","False"]
    },
    "batch_size":{
        "values":[16,32,64]
    },
    "activation":{
        "values": ["relu","elu","swish","gelu"]
    },
      "dropout_dense":{
          "values":[0.0,0.4,0.6]
      },
      "dropout_conv":{
          "values":[0.0,0.2,0.3,0.4]
      },
      "dense_layers":{
          "values":[[32,64],[64,32],[256,256],[16,64],[512,256]]
      },
      "batch_normalization":{
          "values":["True","False"]
      }
  }
}
sweep_id=wandb.sweep(sweep_config,entity="ipda526",project="baseline-drowsiness-detection")
wandb.agent(sweep_id, function=train, count=10) #10 runs

Create sweep with ID: cu69pkll
Sweep URL: https://wandb.ai/ipda526/baseline-drowsiness-detection/sweeps/cu69pkll


[34m[1mwandb[0m: Agent Starting Run: xc3l8g28 with config:
[34m[1mwandb[0m: 	activation: elu
[34m[1mwandb[0m: 	batch_normalization: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dense_layers: [16, 64]
[34m[1mwandb[0m: 	dropout_conv: 0
[34m[1mwandb[0m: 	dropout_dense: 0
[34m[1mwandb[0m: 	epochs: 60
[34m[1mwandb[0m: 	filters_list: [128, 256, 512, 64]
[34m[1mwandb[0m: 	kernel_sizes: [[5, 5], [3, 3], [7, 7], [9, 9]]
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: Currently logged in as: [33mmayukhdas04[0m ([33mipda526[0m). Use [1m`wandb login --relogin`[0m to force relogin


Copying fold 1 to train...
Copying fold 2 to train...
Copying fold 3 to train...
Copying fold 4 to val...
Copying fold 5 to train...
Done!
Found 7950 images belonging to 3 classes.
Found 2196 images belonging to 3 classes.




Epoch 1/60


  output, from_logits, "Softmax", "sparse_categorical_crossentropy"




[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230426_115415-xc3l8g28/files/model-best)... Done. 0.6s



Epoch 1: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.7s


Epoch 2/60
Epoch 2: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.6s


Epoch 3/60

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230426_115415-xc3l8g28/files/model-best)... Done. 0.6s



Epoch 3: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.6s


Epoch 4/60
Epoch 4: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.5s


Epoch 5/60
Epoch 5: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.6s


Epoch 6/60
Epoch 6: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.6s


Epoch 7/60
Epoch 7: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.5s


Epoch 8/60
Epoch 8: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.6s


Epoch 9/60
  4/249 [..............................] - ETA: 3:00 - loss: 9.1260e-05 - accuracy: 1.0000

In [None]:
'''This section is used for loading the models saved with datetime when checkpointing is True'''
# #This can be used when checkpointing is set to True and models are saved in model directory with proper name in the current working directory
# model_dir = 'models_<unique id>' #model director name goes here
# new_model = tf.keras.models.load_model(model_dir)
# # Check its architecture
# new_model.summary()