<a href="https://colab.research.google.com/github/abisubramanya27/CS6910_Assignment2/blob/main/partA/src/Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Mounting drive to store dataset
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
%cd gdrive/MyDrive/assignments/cs6910/A2/Data
!pwd

/content/gdrive/.shortcut-targets-by-id/1H4LUGHYi_ivI7p5xWjyrjFojgIGEh9Jd/cs6910/A2/Data
/content/gdrive/.shortcut-targets-by-id/1H4LUGHYi_ivI7p5xWjyrjFojgIGEh9Jd/cs6910/A2/Data


In [4]:
# !pip install split-folders

In [5]:
# import splitfolders

# # Splitting the training data into training and validation set
# splitfolders.ratio('./inaturalist_12K/train', output='./inaturalist_12K/output', seed=1337, ratio=(.9, .1), group_prefix=None)

In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))

tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


In [7]:
def build_model_partA(inp_img_shape, K_list, F_list, no_neurons_dense, no_classes = 10, activation_fn_list = ['relu']*6, 
                      P_list = ['valid']*10, S_list = [1]*10, BN_yes = False, dropout_p = 0):
    '''
    Function to build the model comprising (5 conv+relu+maxpooling layers + 1 dense FC layer) for part A in keras
    Arguments :
        inp_img_shape -- shape of input image
        K_list -- List of number of filters in each non FC layer
        F_list -- List of size of filters (assumed same dimension in width and height) in each non FC layer  
        no_neurons_dense -- Number of neurons in the dense FC layer
        no_classes -- Number of output classes in the classification problem
        activation_fn_list -- List of activation function in each convolution and FC layer
        P_list -- List of padding options in each non FC layer 
                  ('valid' : no padding, 'same' : padding to make input and output same dimensions)
        S_list -- List of strides (assumed equal in width and height) in each non FC layer
        BN_yes -- True : Batch normalisation (BN) should be used, False : BN should not be used
        dropout_p -- Probability of dropping out a neuron
                     (The dropout is added for the single dense hidden layer alone after referring to many CNN architecture papers)

    Returns :
        model -- The keras sequential model of the CNN created
    '''
    model = Sequential()
    # First layer
    model.add(Conv2D(filters = K_list[0], kernel_size = (F_list[0], F_list[0]), strides = (S_list[0], S_list[0]), 
                     padding = P_list[0], input_shape = inp_img_shape))
    if BN_yes:
        model.add(BatchNormalization())
    model.add(Activation(activation_fn_list[0]))
    model.add(MaxPooling2D(pool_size=(F_list[1], F_list[1]), strides = (S_list[1], S_list[1]), padding = P_list[1]))

    # 4 Conv-relu-MaxPooling layers
    for l in range(1, 5):
        model.add(Conv2D(filters = K_list[2*l], kernel_size = (F_list[2*l], F_list[2*l]), strides = (S_list[2*l], S_list[2*l]), 
                         padding = P_list[2*l]))
        if BN_yes:
            model.add(BatchNormalization())
        model.add(Activation(activation_fn_list[l]))
        model.add(MaxPooling2D(pool_size = (F_list[2*l+1], F_list[2*l+1]), strides = (S_list[2*l+1], S_list[2*l+1]), padding = P_list[2*l+1]))
    
    # 1 dense FC layer
    model.add(Flatten())
    model.add(Dropout(dropout_p))
    model.add(Dense(units = no_neurons_dense))
    if BN_yes:
        model.add(BatchNormalization())
    model.add(Activation(activation = activation_fn_list[5]))

    # Output layer
    model.add(Dense(units = no_classes))
    if BN_yes:
        model.add(BatchNormalization())
    model.add(Activation(activation = 'softmax'))

    return model
    

In [8]:
!pip install --upgrade wandb
!wandb login 6746f968d95eb71e281d6c7772a0469574430408

Requirement already up-to-date: wandb in /usr/local/lib/python3.7/dist-packages (0.10.23)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [16]:
def data_generator(train_data_path, inp_img_shape, batch_size, data_augment_yes = False, val_data_path = None, test_data_path = None):
    # Techniques for data augmentation sent to ImageDataGenerator 
    data_augment_params = {
        'rotation_range': 30,
        'height_shift_range': 0.15,
        'width_shift_range': 0.15,
        #'channel_shift_range': 10,
        #'shear_range': 0.15,
        #'zoom_range': 0.2,
        'horizontal_flip': True 
    }

    train_gen_param = data_augment_params if data_augment_yes else dict()

    # Generators for training, validation and test set image data for Part-A from the respective 
    train_generator = ImageDataGenerator(rescale = 1./255, **train_gen_param).flow_from_directory(train_data_path, 
                                                                                                  target_size = inp_img_shape[:-1], 
                                                                                                  batch_size = batch_size, 
                                                                                                  class_mode = 'categorical')
    val_generator = None
    if val_data_path is not None:
        val_generator = ImageDataGenerator(rescale = 1./255).flow_from_directory(val_data_path, target_size = inp_img_shape[:-1], 
                                                                                 batch_size = batch_size, class_mode = 'categorical')  
    test_generator = None
    if test_data_path is not None:
        test_generator = ImageDataGenerator(rescale = 1./255).flow_from_directory(test_data_path, target_size = inp_img_shape[:-1], 
                                                                                  batch_size = batch_size, class_mode = 'categorical')
    
    return train_generator, val_generator, test_generator


In [18]:
import wandb
from wandb.keras import WandbCallback

config_1 = {
    "learning_rate": 5e-3,
    "epochs": 10,
    "batch_size": 64,
    "loss_function": 'categorical_crossentropy',
    "architecture": 'CNN',
    "dataset": "iNaturalist_12K"
}

In [19]:
import math

def train_model(model, train_size, train_gen, config, data_augment_yes = False, val_gen = None):
    model.compile(optimizer = Adam(learning_rate=config['learning_rate']), loss = config['loss_function'], metrics = ['accuracy'])
    steps_per_epoch = math.ceil(1.0 * train_size / config['batch_size'])
    steps_per_epoch += math.ceil(1024.0 / config['batch_size']) * (data_augment_yes == True)

    model.fit(train_gen,
              epochs = config['epochs'], 
              steps_per_epoch = steps_per_epoch,
              validation_data = val_gen,
              verbose = 2,
              callbacks = [WandbCallback()])
    
    return model

In [None]:
# Running sample run

def CNN(inp_img_shape, train_data_path, K_list, F_list, no_neurons_dense, config, no_classes = 10, activation_fn_list = ['relu']*6, 
        P_list = ['valid']*10, S_list = [1]*10, BN_yes = False, dropout_p = 0, val_data_path = None, test_data_path = None, 
        data_augment_yes = False):
    
    run = wandb.init(project="assignment2", entity="abisheks", reinit=True, config=config)
    tf.keras.backend.clear_session()

    model = build_model_partA(inp_img_shape, K_list, F_list, no_neurons_dense, no_classes, activation_fn_list, P_list, S_list, BN_yes, dropout_p)
    # model.summary()
    train_gen, val_gen, test_gen = data_generator(train_data_path, inp_img_shape, config['batch_size'], data_augment_yes, 
                                                  val_data_path, test_data_path)
    train_size = train_gen.samples
    model = train_model(model, train_size, train_gen, config, data_augment_yes, val_gen)
    
    run.finish()

    return model

# Hyperparameters for building the model for Part-A
K_list_1 = [32, 32, 32, 32, 64, 64, 64, 64, 128, 128]       # List of number of filters in each non FC layer
F_list_1 = [11, 3, 5, 3, 3, 3, 3, 3, 3, 3]                  # List of size of filters in each non FC layer  
no_neurons_dense_1 = 128                                    # Number of neurons in the dense FC layer
activation_fn_list_1 = ['relu']*6                           # List of activation function in each convolution and FC layer
P_list_1 = ['valid']*10                                     # List of padding options in each non FC layer ('valid' : no padding, 'same' : padding to make input and output same dimensions)
S_list_1 = [4, 2, 1, 1, 1, 1, 1, 2, 1, 1]                   # List of number of strides in each non FC layer
inp_img_shape_1 = (227, 227, 3)                             # Shape of input image from data
no_classes_1 = 10                                           # Number of output classes in the classification problem
BN_1 = True                                                 # True : Batch normalisation (BN) should be used, False : BN should not be used
dropout_p_1 = 0.3                                           # Probability of dropping out a neuron


# PART-A, Question 1 -- Building a model with (5 conv+relu+maxpooling layers + 1 dense FC layer) for image classification objective 
modelA = CNN(inp_img_shape_1, './inaturalist_12K/train', K_list_1, F_list_1, no_neurons_dense_1, config_1, no_classes_1, 
             activation_fn_list_1, P_list_1, S_list_1, BN_1, dropout_p_1, './inaturalist_12K/val', './inaturalist_12K/test', False)

Found 9006 images belonging to 10 classes.
Found 1004 images belonging to 10 classes.
Found 2008 images belonging to 10 classes.
Epoch 1/10
141/141 - 115s - loss: 2.1962 - accuracy: 0.2142 - val_loss: 5.0933 - val_accuracy: 0.1643
Epoch 2/10
141/141 - 113s - loss: 2.0381 - accuracy: 0.2759 - val_loss: 2.4603 - val_accuracy: 0.2181
Epoch 3/10
141/141 - 114s - loss: 1.9997 - accuracy: 0.2867 - val_loss: 2.8381 - val_accuracy: 0.1594
Epoch 4/10
141/141 - 114s - loss: 1.9449 - accuracy: 0.3067 - val_loss: 2.3630 - val_accuracy: 0.1932
Epoch 5/10


In [None]:
!find "./inaturalis" -size 0 -print