<a href="https://colab.research.google.com/github/mak109/cs6910_assignment2/blob/main/PART%20A/cs6910_assignment2_partA_question1_2_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training CNN from Scratch

## 1. Packages and imports

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import random
import datetime
import os
from zipfile import ZipFile
from PIL import Image
plt.rcParams["figure.figsize"] = (20,10)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,Sequential,regularizers,optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import random
random.seed(123)

## 2. UTA-RLDD preprocessed dataset downloaded from kaggle

In [1]:
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json
api_token = {"username":"","key":""} #Place your kaggle credentials here

import json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d mak1999/uta-rldd

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading uta-rldd.zip to /kaggle/working
100%|██████████████████████████████████████| 2.40G/2.40G [01:41<00:00, 33.6MB/s]
100%|██████████████████████████████████████| 2.40G/2.40G [01:41<00:00, 25.5MB/s]


In [3]:
filename = 'uta-rldd.zip'
with ZipFile(filename, 'r') as z:
    print('Extracting all the files now...')
    z.extractall()
    print('Done!')
os.remove(filename)

Extracting all the files now...
Done!


## 3. Wandb setup for hyperparameter tuning

In [5]:
import wandb
os.environ['WANDB_ENTITY'] = 'ipda526'
os.environ['WANDB_PROJECT'] = 'baseline-drowsiness-detection'
wandb.login(key='') #Place your wandb api key here
from wandb.keras import WandbCallback,WandbMetricsLogger, WandbModelCheckpoint

[34m[1mwandb[0m: Currently logged in as: [33mdebarpanjana213[0m ([33mipda526[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [6]:
# tf.debugging.set_log_device_placement(True)
tf.config.set_soft_device_placement(True)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPU


## 4. Model definition and training

In [7]:
image_size = (256,256)
num_layers = 4 #Number of convolution layers
num_classes = 3 #0 - awake 1-drowsy 2 - low vigilant
train_dir = 'uta-rldd/train'

In [8]:
def CNN(config):
    model = Sequential([
        layers.Input((image_size[0],image_size[1],3)),
        layers.Rescaling(1./255)
        ])
    
    for l in range(num_layers):
        model.add(layers.Conv2D(filters=config["filters_list"][l],kernel_size=(config["kernel_sizes"][l][0],config["kernel_sizes"][l][1]),
                        activation=config["activation"],padding="same",kernel_regularizer=regularizers.l2(config["weight_decay"])))
        if config["batch_normalization"] == 'True':
            model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D())
        if(l<3):model.add(layers.Dropout(config['dropout']))
            

    model.add(layers.Flatten())
    model.add(layers.Dense(config["dense_layer_size"],activation=config["activation"],kernel_regularizer=regularizers.l2(config["weight_decay"])))
    model.add(layers.Dropout(config['dropout']))
    model.add(layers.Dense(config["dense_layer_size"]/2,activation=config["activation"],kernel_regularizer=regularizers.l2(config["weight_decay"])))

    model.add(layers.Dense(num_classes,activation="softmax"))
    return model

In [9]:
#Training goes here
#Comment out the code related to Wandb if training is done without wandb integration
def train(config_in=None,checkpointing=False):
    #default configuration
    config_ = {
    "kernel_sizes" : [(3,3),(5,5),(7,7),(9,9)],
    "activation" : 'relu',
    "learning_rate": 1e-3,
    "filters_list" : [16,32,64,128],
    "dense_layer_size" : 128,
    "batch_normalization": "True",
    "data_augment": "False",
    "weight_decay":0.0005,
    "dropout":0.2,
    "batch_size":64,
    "epochs":5
    }
    if config_in is not None:
          config = config_in
    else:
          config = config_ #Default Config

    '''Wandb Configs'''
    wandb.init(config=config_)
    config = wandb.config
    #Setting run name for better readability
    wandb.run.name = "nd_"+str(config["dense_layer_size"])+"bs_"+str(config["batch_size"])+"ac_"+str(config["activation"])
    #Some data preprocessing and train,val splitting
    
    #Data Augmentation This can also be validated for better results
    if config["data_augment"] == 'True':
        data_generator = ImageDataGenerator(
        rotation_range=50, #random rotation between -50(clockwise) to 50(anti-clockwise) degree
        brightness_range=(0.2,0.8), 
        zoom_range=0.3, #zoom in range from [0.7,1.3]
        horizontal_flip=True,
        vertical_flip=True,
        width_shift_range=0.1, #Horizontal Shifting as a ratio of width
        height_shift_range=0.2,#Vertical Shifting as a ratio of height
        data_format='channels_last',
        validation_split=0.1
#         dtype=tf.float32
        )
    else:
        data_generator = ImageDataGenerator(
            data_format='channels_last',
            validation_split=0.1
#             dtype=tf.float32
        )
    #Train set creation after conditional augmentation
    train_generator = data_generator.flow_from_directory(
    train_dir,
    target_size = image_size,
    batch_size = config['batch_size'],
    color_mode = 'rgb',
    class_mode = 'sparse',
    shuffle=True,
    subset='training',
    seed=123
    )
    val_generator = ImageDataGenerator(validation_split=0.1,data_format='channels_last').flow_from_directory(
        train_dir,
        target_size = image_size,
        batch_size = config['batch_size'],
        color_mode = 'rgb',
        class_mode = 'sparse',
        shuffle=True,
        subset='validation',
        seed=123
    
    )
    try:
        with tf.device('/device:GPU:0'):
            #Building Model based on config 
            model = CNN(config)

            #Compiling model 
            model.compile(
            optimizer=optimizers.Adam(learning_rate=config["learning_rate"]),
            loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy']
            )
            #For checkpointing default value is False
            if checkpointing == True:
                current_directory = os.getcwd()
                final_directory = os.path.join(current_directory, f'models_{datetime.datetime.now()}')
                if not os.path.exists(final_directory):
                    os.makedirs(final_directory)
                checkpoint_filepath = final_directory
                model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                  filepath=checkpoint_filepath,
                  save_weights_only=False,
                  monitor='val_accuracy',
                  mode='max',
                  save_best_only=True)
                  #Fitting Model
                history = model.fit(train_generator,
                  validation_data=val_generator,
                  epochs=config["epochs"],
                  verbose=1,
                  # callbacks = [WandbCallback()] #Used with wandb
                  callbacks = [model_checkpoint_callback] #Custom callback for checkpointing
                  )
            else:
                history = model.fit(train_generator,
                  validation_data=val_generator,
                  epochs=config["epochs"],
                  verbose=1,#WandbMetricsLogger(log_freq="epoch"),
                  callbacks = [WandbCallback(monitor='val_accuracy',mode='auto'),
                               WandbModelCheckpoint(filepath="models",monitor='val_accuracy',verbose=1,save_freq='epoch',mode='max')] #Used with wandb
                  )
            wandb.finish()
    except RuntimeError as e:
          print(e)
    return history

## Standalone training

In [None]:
history = train()
#Visualization part
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
# plt.savefig('metrics.jpg')
plt.show()

In [10]:
import uuid
'''Wandb Sweeps '''
#Sweep configuration for runs
sweep_config = {
  "name" : "best-sweep-kaggle"+str(uuid.uuid1()),
  "method" : "bayes",
  "metric" : {
      "name" : "val_accuracy",
      "goal" : "maximize"
  },
  "parameters" : {
    "epochs" : {
      "values" : [10,20,30]
    },
    "learning_rate" :{
      "values" : [1e-3,1e-4]
    },
    "kernel_sizes":{
        "values" : [[(3,3),(3,3),(3,3),(3,3)],
                    [(3,3),(3,3),(5,5),(7,7)],
                    [(11,11),(11,11),(7,7),(5,5)],
                    [(3,3),(5,5),(7,7),(9,9)],
                    [(5,5),(5,5),(5,5),(5,5)]]
    },
    "filters_list":{
        "values" : [[32,32,32,32],[256,128,64,32],[32,64,64,128],[32,64,128,256],[64,32,64,32]]
    },
    "weight_decay":{
      "values": [0,0.0005,0.005]  
    },
    "data_augment":{
        "values": ["True","False"]
    },
    "batch_size":{
        "values":[32,64]
    },
    "activation":{
        "values": ["relu","elu","swish","gelu"]
    },
      "dropout":{
          "values":[0.0,0.2,0.4]
      },
      "dense_layer_size":{
          "values":[64,128,256,512]
      },
      "batch_normalization":{
          "values":["True","False"]
      }
  }
}
sweep_id=wandb.sweep(sweep_config,entity="ipda526",project="baseline-drowsiness-detection")
wandb.agent(sweep_id, function=train, count=10)

Create sweep with ID: 814fswy2
Sweep URL: https://wandb.ai/ipda526/baseline-drowsiness-detection/sweeps/814fswy2


[34m[1mwandb[0m: Agent Starting Run: xvsgsoxm with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_normalization: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dense_layer_size: 64
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	filters_list: [32, 64, 128, 256]
[34m[1mwandb[0m: 	kernel_sizes: [[5, 5], [5, 5], [5, 5], [5, 5]]
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	weight_decay: 0.0005


Found 9132 images belonging to 3 classes.
Found 1014 images belonging to 3 classes.




Epoch 1/30


  output, from_logits, "Softmax", "sparse_categorical_crossentropy"
2023-04-28 20:04:56.311521: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer




[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230428_200418-xvsgsoxm/files/model-best)... Done. 0.4s



Epoch 1: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 2/30

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230428_200418-xvsgsoxm/files/model-best)... Done. 0.3s



Epoch 2: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.4s


Epoch 3/30

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230428_200418-xvsgsoxm/files/model-best)... Done. 0.3s



Epoch 3: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 4/30
Epoch 4: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 5/30
Epoch 5: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 6/30

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230428_200418-xvsgsoxm/files/model-best)... Done. 0.3s



Epoch 6: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.4s


Epoch 7/30

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230428_200418-xvsgsoxm/files/model-best)... Done. 0.3s



Epoch 7: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 8/30
Epoch 8: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 9/30
Epoch 9: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 10/30
Epoch 10: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.4s


Epoch 11/30
Epoch 11: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 12/30

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230428_200418-xvsgsoxm/files/model-best)... Done. 0.3s



Epoch 12: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 13/30

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/wandb/run-20230428_200418-xvsgsoxm/files/model-best)... Done. 0.3s



Epoch 13: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.4s


Epoch 14/30
Epoch 14: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 15/30
Epoch 15: saving model to models


[34m[1mwandb[0m: Adding directory to artifact (./models)... Done. 0.3s


Epoch 16/30
 10/143 [=>............................] - ETA: 2:58 - loss: 0.3851 - accuracy: 0.9547

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


VBox(children=(Label(value='1400.951 MB of 1400.951 MB uploaded (0.794 MB deduped)\r'), FloatProgress(value=1.…

0,1
accuracy,▁▅▆▆▇▇▇▇▇▇▇████
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▃▂▂▂▂▁▂▂▂▂▂▁▁▁
val_accuracy,▂▂▅▂▃▅▇▂▁▅▄██▃▁
val_loss,▆▆▅▄▃▄▂▇▄▂▄▂▁▂█

0,1
accuracy,0.97317
best_epoch,12.0
best_val_accuracy,0.63708
epoch,14.0
loss,0.31139
val_accuracy,0.34911
val_loss,12.36236


In [None]:
'''This section is used for loading the models saved with datetime when checkpointing is True'''
# #This can be used when checkpointing is set to True and models are saved in model directory with proper name in the current working directory
# model_dir = 'models_2022-04-03 00:00:29.823768' #model director name goes here
# new_model = tf.keras.models.load_model(model_dir)
# # Check its architecture
# new_model.summary()