# Part A

This section contains implementation specifics of building a CNN based image classifier using the iNaturalist dataset.

The Architecture:
1.   Five convolution layers with each layer followed by a 
ReLU activation and a max pooling layer.
2.   One dense layer 
3.   One output layer containing 10 neurons (1 for each of the 10 classes). 

Import essential libraries

In [67]:
# Essentials
import numpy as np
import tensorflow
from tensorflow import keras
from keras import regularizers
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.layers import Dense, Flatten, Conv2D, BatchNormalization, Dropout, MaxPooling2D, Activation
from keras.optimizers import Adam
from keras import callbacks
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, Callback, EarlyStopping
import random
import imageio
import os
import cv2
import glob
random.seed(42)

In [16]:
# WandB – Install the W&B library
%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

Fetch dataset from GitHub

In [17]:
# Fetch the dataset form Github
!git clone https://github.com/borate267/inaturalist-dataset.git

fatal: destination path 'inaturalist-dataset' already exists and is not an empty directory.


Read the training and validation images

In [6]:
# Define the labels for the Simpsons characters we're detecting
class_names = {0:'Amphibia', 1:'Animalia', 2:'Arachnida',3: 'Aves',4: 'Fungi',
              5: 'Insecta', 6:'Mammalia', 7:'Mollusca', 8:'Plantae',9: 'Reptilia'}
num_classes = 10
img_size = 128
dir = 'inaturalist-dataset/train'

import random

# Load training data
X_train = []
y_train = []
for label, name in class_names.items():
   list_images = os.listdir(dir+'/'+name)
   for image_name in list_images:
       image = imageio.imread(dir+'/'+name+'/'+image_name)
       if np.ndim(image) == 3:
          X_train.append(cv2.resize(image, (img_size,img_size)))
          y_train.append(label)


Shuffle the images and then retain 10% as validation data

In [7]:
leng = np.shape(X_train)
arr = np.arange(leng[0])
np.random.shuffle(arr)
X_train_shuf = []
y_train_shuf = []
X_val_shuf = []
y_val_shuf = []

for i in range(leng[0]):
  if i <= 9000:
    X_train_shuf.append(X_train[arr[i]])
    y_train_shuf.append(y_train[arr[i]])
  else:
    X_val_shuf.append(X_train[arr[i]])
    y_val_shuf.append(y_train[arr[i]])

X_train = np.array(X_train_shuf)
y_train = np.array(y_train_shuf)
X_val = np.array(X_val_shuf)
y_val = np.array(y_val_shuf)

# Normalize the data
X_train = X_train/255.0
X_val = X_val/255.0

# One hot encode the labels 
y_train = np_utils.to_categorical(y_train, num_classes)
y_val = np_utils.to_categorical(y_val, num_classes)


Configure the sweep hyperparameter dictionary

In [8]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'kernel_size':{
            'values': [[(3,3),(3,3),(3,3),(3,3),(3,3)], [(3,3),(5,5),(5,5),(7,7),(7,7)], [(7,7),(7,7),(5,5),(5,5),(3,3)], [(3,3),(5,5),(7,7),(9,9),(11,11)] ]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.005]
        },
        'dropout': {
            'values': [0, 0.2, 0.4]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'activation': {
            'values': ['relu', 'elu', 'selu']
        },
        'batch_norm':{
            'values': ['true','false']
        },
        #'filt_org':{
        #    'values': [[32,32,32,32,32],[32,64,64,128,128],[128,128,64,64,32],[32,64,128,256,512]]
        #},
        'filt_org' :{
            'values': ['standard','double', 'half']
        },
        'conv_layer_size':{
            'values' : [16, 32]
        },
        'data_augment': {
            'values': ['true','false']
        },
        'batch_size': {
            'values': [32, 64]
        },
        'num_dense':{
            'values': [64, 128, 256, 512]
        }
    }
}

 Initialize the Sweep

In [88]:
# Initialize a new sweep
sweep_id = wandb.sweep(sweep_config, entity="bharatik", project="cs6910assignment2")



Create sweep with ID: iieb3ba8
Sweep URL: https://wandb.ai/bharatik/cs6910assignment2/sweeps/iieb3ba8


In [95]:
def train():
    
    config_defaults = {
        'kernel_size': [(3,3),(3,3),(3,3),(3,3),(3,3)],
        'weight_decay': 0.005,
        'dropout': 0.2,
        'learning_rate': 1e-3,
        'activation': 'relu',
        'batch_size': 64,
        'epochs': 10,
        'batch_norm': 'true',
        'filt_org' : 'standard',
        'conv_layer_size' : 16,
        'data_augment': 'true',
        'num_dense': 256,
        'seed': 42,
        'num_classes': 10
    }

    # Initialize a new wandb run
    wandb.init(config=config_defaults)
    
    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config
    wandb.run.name = 'num_dense_'+ str(config.num_dense)+'_bs_'+str(config.batch_size)+'_ac_'+ config.activation
    
    # Determine input shape
    input_shape = (img_size, img_size , 3)
    
    # Define the model architecture
    model = Sequential()
    
    # ficing conv layer sizeƒ

    dummy = config.conv_layer_size
    filter = []
    for i in range(5):
      if config.filt_org == 'standard':
        filter.append(dummy)
      elif config.filt_org == "double":
        filter.append( dummy*(2**i) )
      elif config.filt_org == "half":
        filter.append( dummy*((0.5)**i) )

    # Layer one
    model.add(Conv2D(filters = filter[0], kernel_size = config.kernel_size[0],padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))

    if config.activation == "relu":
        model.add(Activation('relu'))
    elif config.activation == "elu":
        model.add(Activation('elu'))
    elif config.activation == "selu":
        model.add(Activation('selu'))

    if config.batch_norm == 'True':
        model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(config.dropout))


    # Layer two
    model.add(Conv2D(filters = filter[1], kernel_size = config.kernel_size[1], padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))

    if config.activation == "relu":
        model.add(Activation('relu'))
    elif config.activation == "elu":
        model.add(Activation('elu'))
    elif config.activation == "selu":
        model.add(Activation('selu'))

    if config.batch_norm == 'True':
        model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(config.dropout))


    # Layer three
    model.add(Conv2D(filters = filter[2], kernel_size = config.kernel_size[2], padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))

    if config.activation == "relu":
        model.add(Activation('relu'))
    elif config.activation == "elu":
        model.add(Activation('elu'))
    elif config.activation == "selu":
        model.add(Activation('selu'))

    if config.batch_norm == 'True':
        model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(config.dropout))

    # Layer four
    model.add(Conv2D(filters = filter[3], kernel_size = config.kernel_size[3], padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))

    if config.activation == "relu":
        model.add(Activation('relu'))
    elif config.activation == "elu":
        model.add(Activation('elu'))
    elif config.activation == "selu":
        model.add(Activation('selu'))

    if config.batch_norm == 'True':
        model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(config.dropout))
    

    # Layer five
    model.add(Conv2D(filters = filter[4], kernel_size = config.kernel_size[4], padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))

    if config.activation == "relu":
        model.add(Activation('relu'))
    elif config.activation == "elu":
        model.add(Activation('elu'))
    elif config.activation == "selu":
        model.add(Activation('selu'))

    if config.batch_norm == 'True':
        model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))

    # FC layer
    model.add(Flatten())
    model.add(Dense(config.num_dense, activation = config.activation, kernel_regularizer = regularizers.l2(config.weight_decay)))
    model.add(BatchNormalization())

    # Output layer
    model.add(Dense(num_classes, activation = "softmax"))

    # Define the optimizer
    #optimizer = Adam(lr=config.learning_rate, beta_1=0.9, beta_2=0.999)
    
    model.compile(loss = "categorical_crossentropy", optimizer = 'adam', metrics=['accuracy'])

    #data augmentation
    if config.data_augment == 'true':
        datagen = ImageDataGenerator(
            featurewise_center=True,  # set input mean to 0 over the dataset
            samplewise_center=True,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=45,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False  # randomly flip images
        )
    else:
        datagen = ImageDataGenerator(rescale = 1.0)

    datagen.fit(X_train)
    
    #model.fit( datagen.flow(X_train, y_train, batch_size = config.batch_size), steps_per_epoch=len(X_train)/32, epochs=config.epochs,
                       # validation_data=(X_val, y_val), callbacks = [WandbCallback()] )
    
    model.fit(
        x = X_train,
        y = y_train,
        batch_size = config.batch_size,
        epochs = config.epochs,
        verbose = 1,
        validation_data= (X_val, y_val),
        callbacks = [WandbCallback(),keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
    )
    

    
    

Run the sweep agent for 100 runs or more

In [None]:
# Initialize a new sweep
# Arguments:
#     – sweep_id: the sweep_id to run - this was returned above by wandb.sweep()
#     – function: function that defines your model architecture and trains it
wandb.agent('th7cm1co', train, count = 50)

Testing ground

In [43]:
 # Load testing dataset

dir = "inaturalist-dataset/val"
X_test = []
y_test = []
for label, name in class_names.items():
   list_images = os.listdir(dir+'/'+name)
   for image_name in list_images:
       image = imageio.imread(dir+'/'+name+'/'+image_name)
       if np.ndim(image) == 3:
          X_test.append(cv2.resize(image, (img_size,img_size)))
          y_test.append(label)



Shuffle and pre-process the dataset

In [44]:
leng = np.shape(X_test)
arr = np.arange(leng[0])
np.random.shuffle(arr)
X_test_shuf = []
y_test_shuf = []

for i in range(leng[0]):
  X_test_shuf.append(X_test[arr[i]])
  y_test_shuf.append(y_test[arr[i]])

X_test = np.array(X_test_shuf)
y_test = np.array(y_test_shuf)

# Normalize the data
X_test = X_test/255.0

# One hot encode the labels (neural nets only like numbers)
y_test = np_utils.to_categorical(y_test, num_classes)


Testing accuracy and losses are computed by calling the following function using the best set of hyperparameters obtained by sweeping over 148 runs

In [91]:
def test():
    
    # BEST HYPERPARAMETERS AFTER 148 SWEEPS
    best_kernel_size = [(3,3),(5,5),(5,5),(7,7),(7,7)]
    best_weight_decay = 0
    best_dropout = 0
    best_learning_rate = 1e-3
    best_activation = 'elu'
    best_batch_size = 64
    best_batch_norm = 'true'
    best_filt_org = 'standard'
    best_conv_layer_size = 32
    best_data_augment = 'false'
    best_num_dense = 256
    
    # Determine input shape
    input_shape = (img_size, img_size, 3)
    
    # Define the model architecture
    model = Sequential()
    
    # fixing conv layer size

    filter = []
    for i in range(5):
        filter.append(best_conv_layer_size)
      
    # Layer one
    model.add(Conv2D(filters = filter[0], kernel_size = best_kernel_size[0],padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(best_weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(best_dropout))


    # Layer two
    model.add(Conv2D(filters = filter[1], kernel_size = best_kernel_size[1],padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(best_weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(best_dropout))


    # Layer three
    model.add(Conv2D(filters = filter[2], kernel_size = best_kernel_size[2],padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(best_weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(best_dropout))

    # Layer four
    model.add(Conv2D(filters = filter[3], kernel_size = best_kernel_size[3],padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(best_weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(best_dropout))
    

    # Layer five
    model.add(Conv2D(filters = filter[4], kernel_size = best_kernel_size[4],padding = 'same', 
                    input_shape = input_shape, kernel_regularizer=regularizers.l2(best_weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(best_dropout))

    # FC layer
    model.add(Flatten())
    model.add(Dense(best_num_dense, activation = 'elu', kernel_regularizer = regularizers.l2(best_weight_decay)))
    model.add(BatchNormalization())

    # Output layer
    model.add(Dense(10, activation = "softmax"))

    # Define the optimizer
    optimizer = Adam(lr= best_learning_rate, beta_1=0.9, beta_2=0.999)
    
    model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])

    datagen = ImageDataGenerator(rescale = 1.0)
    datagen.fit(X_train)
    
    model.fit(
        x = X_train,
        y = y_train,
        batch_size = best_batch_size,
        epochs = 10,
        verbose = 1,
        validation_data= (X_test, y_test),
    )

    y_pred = []

    #for i in range(10):
    class_ = class_names[0]
    list_images = os.listdir('inaturalist-dataset/val/'+ class_)
    n = random.randint(0,200)

    # Read in a character image from the test dataset
    image = imageio.imread('inaturalist-dataset/val/'+ class_+'/'+list_images[n])
    img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Resize image and normalize it
    pic = cv2.resize(image, (128, 128)).astype('float32') / 255.
          
    # predictions for the class
    prediction = model.predict(pic.reshape(1, 128, 128,3))[0]

    # Get true name of the character
    name = class_.split('_')[0].title()

test()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [94]:
wandb.init(project="cs6910assignment2", entity="bharatik")

y_pred = []

 #for i in range(10):
class_ = class_names[0]
list_images = os.listdir('inaturalist-dataset/val/'+ class_)
n = random.randint(0,200)

# Read in a character image from the test dataset
image = imageio.imread('inaturalist-dataset/val/'+ class_+'/'+list_images[n])
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Resize image and normalize it
pic = cv2.resize(image, (128, 128)).astype('float32') / 255.
      
# predictions for the class
prediction = model.predict(pic.reshape(1, 128, 128,3))[0]

# Get true name of the character
name = class_.split('_')[0].title()





VBox(children=(Label(value=' 0.42MB of 0.42MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,574
_timestamp,1618226261
_step,1


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█


In [93]:
# Format predictions to string to overlay on image
text = sorted(['{:s} : {:.1f}%'.format(class_names[k].split('_')[0].title(), 100*v) for k,v in enumerate(prediction)], 
    key=lambda x:float(x.split(':')[1].split('%')[0]), reverse=True)[:3]

# Upscale image
img = cv2.resize(img, (352, 352))

# Create background to overlay text on
cv2.rectangle(img, (0,260),(215,352),(255,255,255), -1)

# Add text to image
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(img, 'True Name : %s' % name, (10, 280), font, 0.7,(73,79,183), 2, cv2.LINE_AA)
for k, t in enumerate(text):
    cv2.putText(img, t, (10, 300+k*18), font, 0.65,(0,0,0), 2, cv2.LINE_AA)
    
# Add predicted image from test dataset with annotations to array
y_pred.append(wandb.Image(img, caption="Actual: %s" % name))    

wandb.log({"predictions": y_pred})