### Section I: Setup

#### Importing libraries required

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os, sys
from glob import glob
from mpl_toolkits.mplot3d import Axes3D
import random
from PIL import Image
from tqdm import tqdm
from tqdm.notebook import tqdm_notebook
import tensorflow
import zipfile
import tensorflow_hub as hub
import sys
np.set_printoptions(threshold=sys.maxsize)

#### Extracting relevant zip files for running models

In [None]:
local_zip = "Task1.zip"
with zipfile.ZipFile(local_zip, 'r') as image_zip:
    image_zip.extractall()

In [None]:
zip_file = "Preprocessed_subset_Task1.zip"
with zipfile.ZipFile(zip_file, 'r') as preprocessed_image_zip:
    preprocessed_image_zip.extractall()
print("Unzipped files!!")

Unzipped files!!


### Section II: EDA and reference functions

#### Expolatory Data Analysis 

In [None]:
#Demo code to read data
def rot(n):
    """
    Taking a rotation vector n and returning the resultant rotation matrix
    """
    n = np.asarray(n).flatten()
    assert(n.size == 3)

    theta = np.linalg.norm(n)
    if theta:
        n /= theta
        K = np.array([[0, -n[2], n[1]], [n[2], 0, -n[0]], [-n[1], n[0], 0]])
        return np.identity(3) + np.sin(theta) * K + (1 - np.cos(theta)) * K @ K
    else:
        return np.identity(3)

def get_bbox(p0, p1):
    """
    Input:
    * p0, p1
    (3)
    Corners of a bounding box represented in the body frame.

    Output:
    *   v
    (3, 8)
    Vertices of the bounding box represented in the body frame.
    * e
    (2, 14)
    Edges of the bounding box. The first 2 edges indicate the `front` side
    of the box.
    """
    v = np.array([
        [p0[0], p0[0], p0[0], p0[0], p1[0], p1[0], p1[0], p1[0]],
        [p0[1], p0[1], p1[1], p1[1], p0[1], p0[1], p1[1], p1[1]],
        [p0[2], p1[2], p0[2], p1[2], p0[2], p1[2], p0[2], p1[2]]])
  
    e = np.array([
        [2, 3, 0, 0, 3, 3, 0, 1, 2, 3, 4, 4, 7, 7],
        [7, 6, 1, 2, 1, 2, 4, 5, 6, 7, 5, 6, 5, 6]], dtype=np.uint8)

    return v, e

#Printing list of classes from classes.csv file
classes = (
    'Unknown', 'Compacts', 'Sedans', 'SUVs', 'Coupes',
    'Muscle', 'SportsClassics', 'Sports', 'Super', 'Motorcycles',
    'OffRoad', 'Industrial', 'Utility', 'Vans', 'Cycles',
    'Boats', 'Helicopters', 'Planes', 'Service', 'Emergency',
    'Military', 'Commercial', 'Trains')

In [None]:
df = pd.read_csv("classes.csv")
df.head()

In [None]:
#Getting point cloud for each image 
xyz = np.fromfile(snapshot.replace('_image.jpg', '_cloud.bin'), dtype=np.float32)
xyz = xyz.reshape([3, -1])

#Getting the camera projection matrix 'M' for each image 
proj = np.fromfile(snapshot.replace('_image.jpg', '_proj.bin'), dtype=np.float32)
proj.resize([3, 4])

#Getting bounding box for each image
try:
    bbox = np.fromfile(snapshot.replace('_image.jpg', '_bbox.bin'), dtype=np.float32)
except FileNotFoundError:
    print('[*] bbox not found.')
    bbox = np.array([], dtype=np.float32)

bbox = bbox.reshape([-1, 11])

In [None]:
uv = proj @ np.vstack([xyz, np.ones_like(xyz[0, :])])
uv = uv / uv[2, :]

dist = np.linalg.norm(xyz, axis=0)
fig1 = plt.figure(1, figsize=(10, 7))
ax1 = fig1.add_subplot(1, 1, 1)
ax1.imshow(img)
ax1.scatter(uv[0, :], uv[1, :], c=dist, marker='+', s=1)
ax1.axis('scaled')
fig1.tight_layout()

fig2 = plt.figure(2, figsize=(8, 8))
ax2 = Axes3D(fig2)
ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_zlabel('z')

step = 5
ax2.scatter(
    xyz[0, ::step], xyz[1, ::step], xyz[2, ::step],
    c=dist[::step], marker='.', s=1
)

colors = ['C{:d}'.format(i) for i in range(10)]
for k, b in enumerate(bbox):
    R = rot(b[0:3])
    t = b[3:6]

    sz = b[6:9]
    vert_3D, edges = get_bbox(-sz / 2, sz / 2)
    vert_3D = R @ vert_3D + t[:, np.newaxis]

    vert_2D = proj @ np.vstack([vert_3D, np.ones(vert_3D.shape[1])])
    vert_2D = vert_2D / vert_2D[2, :]

    clr = colors[np.mod(k, len(colors))]
    for e in edges.T:
        ax1.plot(vert_2D[0, e], vert_2D[1, e], color=clr)
        ax2.plot(vert_3D[0, e], vert_3D[1, e], vert_3D[2, e], color=clr)

    c = classes[int(b[9])]
    ignore_in_eval = bool(b[10])
    if ignore_in_eval:
        ax2.text(t[0], t[1], t[2], c, color='r')
    else:
        ax2.text(t[0], t[1], t[2], c)

ax2.auto_scale_xyz([-40, 40], [-40, 40], [0, 80])
ax2.view_init(elev=-30, azim=-90)

for e in np.identity(3):
    ax2.plot([0, e[0]], [0, e[1]], [0, e[2]], color=e)

plt.show()

In [None]:
#Plotting multiple images in the test set
# Set up matplotlib fig, and size it to fit 4x4 pics
def plot_multiple_images(nrows, ncols, image_path_files):
    ncols = ncols
    nrows = nrows
    fig = plt.gcf()
    fig.set_size_inches(14, 10)
    idx_list = random.sample(range(1, len(image_path_files)), 20)

    car_pix = [image_path_files[idx] for idx in idx_list]

    for i, img_path in tqdm_notebook(enumerate(car_pix)):
        # Set up subplot; subplot indices start at 1
        sp = plt.subplot(nrows, ncols, i + 1)
        sp.axis('Off') # Don't show axes (or gridlines)
        img = plt.imread(img_path)
        plt.imshow(img)

    plt.show()

plot_multiple_images(4, 5, files)

In [None]:
zip_file = "Preprocessed_Task1.zip"
with zipfile.ZipFile(zip_file, 'r') as preprocessed_image_zip:
    preprocessed_image_zip.extractall()
print("Unzipped files!!")

In [None]:
base_dir2 = os.path.join(os.getcwd(), "Preprocessed_Task1")
train_unknown_dir = os.path.join(base_dir2, "Unknown_and_Others")
train_car_dir = os.path.join(base_dir2, "Cars")
train_2wheeler_dir = os.path.join(base_dir2, "2_Wheeler")
train_offroad_dir = os.path.join(base_dir2, "Offroad")
train_industrial_dir = os.path.join(base_dir2, "Industrial")
train_utility_dir = os.path.join(base_dir2, "Utility")
train_service_dir = os.path.join(base_dir2, "Service")
train_commercial_dir = os.path.join(base_dir2, "Commercial")
#Printing results to crosscheck successful file transfer
print("The number of Class: {} = {}".format("Unknown_and_Others", len(os.listdir(train_unknown_dir))))
print("The number of Class: {} = {}".format("Cars", len(os.listdir(train_car_dir))))
print("The number of Class: {} = {}".format("2_Wheeler", len(os.listdir(train_2wheeler_dir))))
print("The number of Class: {} = {}".format("Offroad", len(os.listdir(train_offroad_dir))))
print("The number of Class: {} = {}".format("Industrial", len(os.listdir(train_industrial_dir))))
print("The number of Class: {} = {}".format("Utility", len(os.listdir(train_utility_dir))))
print("The number of Class: {} = {}".format("Service", len(os.listdir(train_service_dir))))
print("The number of Class: {} = {}".format("Commercial", len(os.listdir(train_commercial_dir))))

In [None]:
from glob import glob
print("Total number of training images = {}".format(len(glob("Preprocessed_Task1/*/*.jpg"))))

#### Reference functions and checking GPU instance

In [None]:
#Creating mapping dictionaries for output label class and class names
id2label_dict = {id:int(df[df.class_id == id].label) for id in df.class_id}
label2classes_dict = {0: "Unknown_and_Others",
                      1: "Cars",
                      2: "Other_modes_of_transport"}

#Reference functions to copy images into their respective directories
def img2classid(img_path):
    bbox = np.fromfile(img_path.replace('_image.jpg', '_bbox.bin'), dtype=np.float32)
    bbox = bbox.reshape([-1, 11])
    class_id = int(bbox[:,-2])
    return id2label_dict[class_id]

In [None]:
#Checking GPU available
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
else:
    print(gpu_info)
    
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
    print('Not using a high-RAM runtime')
else:
    print('You are using a high-RAM runtime!')

#### Loading images using ImageDataGenerator class

In [None]:
#Creating ImageDataGenerators for training, validation sets
base_dir2 = os.path.join(os.getcwd(), "Preprocessed_Task1")
from tensorflow.keras.preprocessing.image import ImageDataGenerator
res_size = 224
classes_ls = ["Unknown_and_Others", "Cars", "2_Wheeler", "Offroad", "Industrial", "Utility", "Service", "Commercial"]

train_datagen = ImageDataGenerator(height_shift_range=0.15, fill_mode='nearest',
                                   horizontal_flip=True, rescale=1.0/255., validation_split=0.15)

train_generator = train_datagen.flow_from_directory(
                  base_dir2, target_size=(res_size, res_size), color_mode='rgb',
                  classes = classes_ls,
                  class_mode='categorical', batch_size=32, shuffle=True,
                  subset="training", interpolation='bilinear')

val_generator = train_datagen.flow_from_directory(
                base_dir2, target_size=(res_size, res_size), color_mode='rgb',
                classes = classes_ls,
                class_mode='categorical', batch_size=32, shuffle=True,
                subset="validation", interpolation='bilinear')

Found 6441 images belonging to 8 classes.
Found 1132 images belonging to 8 classes.


#### Fine tuning a ResNetV2 50 model

In [None]:
##Leveraging training on Google Colab pro with starting 130 layers frozen of ResNetV2 50
from tensorflow.keras.models import load_model
checkpoint_path = "training_ResNetV2-005.h5"
resnetv2_model = load_model(checkpoint_path)
resnetv2_model.summary()

In [None]:
#Compiling the model
from tensorflow.keras.optimizers import Adam
opt = Adam(learning_rate = 2e-3)
loss = tensorflow.keras.losses.CategoricalCrossentropy()

resnetv2_model.compile(optimizer = opt, 
                       loss = loss, 
                       metrics = ["accuracy"])

lor = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.3, patience=4, verbose=1,
    mode='auto', min_delta=0.1, min_lr=0)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "ResNetV2_training/training-{epoch:03d}-{val_loss:04f}-{val_accuracy:04f}.h5", monitor='val_accuracy', verbose=1, save_best_only=True,
    save_weights_only = False, save_freq='epoch')

with tf.device('/gpu:0'):
    history2 = resnetv2_model.fit(
               train_generator,
               validation_data = val_generator,
               epochs = 55, 
               verbose = 1, shuffle=True,
               callbacks = [lor, checkpoint])

In [None]:
acc = history2.history['accuracy']
val_acc = history2.history['val_accuracy']
loss = history2.history['loss']
val_loss = history2.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

In [None]:
acc = history2.history['accuracy']
val_acc = history2.history['val_accuracy']
loss = history2.history['loss']
val_loss = history2.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

#### Fine Training ResNetv250 with more data augmentation

In [None]:
##Leveraging training on Google Colab pro with starting 130 layers frozen of ResNetV2 50
from tensorflow.keras.models import load_model
checkpoint_path = "ResNetV2_training/training_ResNetV2-046.h5"
resnetv2_model = load_model(checkpoint_path)

In [None]:
#Only th first 90 layers are frozen
for layer in resnetv2_model.layers[90:130]:
    layer.trainable = True

In [None]:
base_dir2 = os.path.join(os.getcwd(), "Preprocessed_Task1")
from tensorflow.keras.preprocessing.image import ImageDataGenerator
res_size = 224
classes_ls = ["Unknown_and_Others", "Cars", "2_Wheeler", "Offroad", "Industrial", "Utility", "Service", "Commercial"]

train_datagen = ImageDataGenerator(height_shift_range=0.15, fill_mode='nearest',
                                   horizontal_flip=True, rescale=1.0/255., validation_split=0.15,
                                   width_shift_range=0.1, brightness_range=(0.3, 0.7), zoom_range=0.1,
                                  rotation_range = 10, shear_range=0.1)

train_generator = train_datagen.flow_from_directory(
                  base_dir2, target_size=(res_size, res_size), color_mode='rgb',
                  classes = classes_ls,
                  class_mode='categorical', batch_size=32, shuffle=True,
                  subset="training", interpolation='lanczos')

val_generator = train_datagen.flow_from_directory(
                base_dir2, target_size=(res_size, res_size), color_mode='rgb',
                classes = classes_ls,
                class_mode='categorical', batch_size=32, shuffle=True,
                subset="validation", interpolation='lanczos')

Found 6441 images belonging to 8 classes.
Found 1132 images belonging to 8 classes.


In [None]:
#Compiling the model
from tensorflow.keras.optimizers import Adam
opt = Adam(learning_rate = 4e-3)
loss = tensorflow.keras.losses.CategoricalCrossentropy()

resnetv2_model.compile(optimizer = opt, 
                       loss = loss, 
                       metrics = ["accuracy"])

lor = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.25, patience=4, verbose=1,
    mode='auto', min_delta=0.02, min_lr=1e-12)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "ResNetV2_training/training-{epoch:03d}.h5", monitor='val_accuracy', verbose=1, save_best_only=True,
    save_weights_only = False, save_freq='epoch') 

with tf.device('/gpu:0'):
    history2 = resnetv2_model.fit(
           train_generator,
           validation_data = val_generator,
           epochs = 25, 
           verbose = 1, shuffle = True,
           callbacks = [lor, checkpoint])

#### Training an EfficientNetv2 B0 on preprocessed data - Tensorflow Hub

In [None]:
effb0_size = 224
do_fine_tuning = True
base_model = hub.KerasLayer("https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/classification/2",
                            trainable = do_fine_tuning)

effv2b0_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(effb0_size, effb0_size,3)),
    base_model,

    # Add a fully connected layer 
    tf.keras.layers.Dense(512, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    #tf.keras.layers.BatchNormalization(),

    # Add another fully connected layer 
    tf.keras.layers.Dense(256, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    #tf.keras.layers.BatchNormalization(),

    # Add another fully connected layer 
    tf.keras.layers.Dense(64, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    
    # Add a final sigmoid layer for classification
    tf.keras.layers.Dense(8, activation = "softmax")])         

effv2b0_model.build((None, effb0_size, effb0_size, 3))

In [None]:
#Compiling the model
from tensorflow.keras.optimizers import Adam

opt = Adam(learning_rate=5e-3)
loss = tensorflow.keras.losses.CategoricalCrossentropy()

effv2b0_model.compile(optimizer = opt, 
                       loss = loss, 
                       metrics = ["accuracy"])

lor = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.3, patience=4, verbose=1,
    mode='auto', min_delta=0.01, min_lr=1e-13)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "Efficientv2B0_training-{epoch:03d}.h5", monitor='val_accuracy', verbose=1, save_best_only=True,
    save_weights_only = False, save_freq='epoch')

history2 = effv2b0_model.fit(
            train_generator,
            validation_data = val_generator,
            epochs = 60, 
            verbose = 1, shuffle=True,
            callbacks = [lor, checkpoint])

In [None]:
acc = history2.history['accuracy']
val_acc = history2.history['val_accuracy']
loss = history2.history['loss']
val_loss = history2.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

### Section III: Evaluating model performance

#### ResNetV250 model

In [None]:
from tensorflow.keras.models import load_model
checkpoint_path = "ResNetV2_training/training_ResNetV2-046.h5"
test_resnetv250_model = load_model(checkpoint_path)

In [None]:
val_generator = train_datagen.flow_from_directory(
                base_dir2, target_size=(res_size, res_size), color_mode='rgb',
                classes = classes_ls,
                class_mode='categorical', batch_size=32, shuffle=False,
                subset="validation", interpolation='bilinear')

predictions = test_resnetv250_model.predict(val_generator, verbose=1, use_multiprocessing=True)
y_true = val_generator.classes
tf.math.confusion_matrix(
    y_true, np.argmax(predictions, axis=1), num_classes=8)
classes_ls = ["Unknown_and_Others", "Cars", "2_Wheeler", "Offroad", "Industrial", "Utility", "Service", "Commercial"]

Found 1132 images belonging to 8 classes.


<tf.Tensor: shape=(8, 8), dtype=int32, numpy=
array([[  3,   0,   0,   0,   0,   0,   0,   2],
       [  5, 628,   0,  24,   8,  33,   1,  21],
       [  0,   7,  13,   8,   0,   6,   0,   0],
       [  0,  46,   0,  22,   0,  12,   1,   0],
       [  0,   0,   0,   0,  20,   0,   0,   0],
       [  1,  84,   5,  13,   0,  87,   1,   3],
       [  0,   4,   1,   0,   2,   5,  30,   1],
       [  0,  15,   0,   0,   1,   2,   0,  17]], dtype=int32)>

Inferences --
* Cars being confused with Utility class vehicles (Utility & vans)
* Cars being confused with OffRoad class
* Cars being confused with Commercial vehicles

In [None]:
from tensorflow.keras.models import load_model
checkpoint_path = "EfficientB4_training/training-017-0.9035-0.7473.h5"

with tf.device('/gpu:0'):
    test_effb4_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})
    effb4_size = 384

    val_generator = train_datagen.flow_from_directory(
                      base_dir2, target_size=(effb4_size, effb4_size), color_mode='rgb',
                      classes = classes_ls,
                      class_mode='categorical', batch_size=16, shuffle=False,
                      subset="validation", interpolation='lanczos')

    predictions = test_effb4_model.predict(val_generator, verbose=1, use_multiprocessing=True)
    y_true = val_generator.classes
    
print(tf.math.confusion_matrix(
        y_true, np.argmax(predictions, axis=1), num_classes=8))

Found 1132 images belonging to 8 classes.


2021-12-11 03:14:35.978600: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200




In [None]:
#Loading test dataset images
from tensorflow.keras.preprocessing.image import ImageDataGenerator
test_X384 = np.array(np.load("effv2s_test_x.npy"))
test_datagen = ImageDataGenerator(rescale=1.0/255.)

test_generator = test_datagen.flow(test_X384, batch_size = 16, shuffle=False)

In [None]:
##Creating submission csv based on EffNetB4 training
#Transforming labels to appropriate classes for csv submission
def convert_label(label):
    if label == 3 or label == 4 or label == 5:
        return 2
    elif label == 6 or label == 7:
        return 0
    else:
        return label
        
from tensorflow.keras.models import load_model
test_files = np.load("test_files.npy")

def test_model_performance(model_path, csv_file_name, test_array):
    checkpoint_path = model_path
    with tf.device('/gpu:0'):
        test_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})

        #Obtain predictions from the test data
        predictions = test_model.predict(test_generator, verbose=1)
        print("Dimensions of the predictions array: {}".format(predictions.shape))
        assert predictions.shape[0] == np.array(test_array).shape[0]
        predictions_ls = [convert_label(label) for label in np.argmax(predictions, axis=1).tolist()]

        #Creating a dataframe and saving it as output
        test_file = [fname[5:-10] for fname in test_files]
        data = {'guid/image': test_file, 'label': predictions_ls}
        output_df = pd.DataFrame(data)
        output_df.to_csv(csv_file_name, index=False)
        print("File_downloaded as .csv!!")

test_model_performance(model_path = "EfficientB4_training/training-017-0.9035-0.7473.h5",
                       csv_file_name = "EffNetB4_preprocessed_Team11.csv", 
                       test_array = test_X384)

2021-12-11 03:51:02.648274: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200


Dimensions of the predictions array: (2631, 8)
File_downloaded as .csv!!


#### Performing Error analysis on test set

In [None]:
test_X = np.array(np.load("res_test_x.npy"))
test_datagen = ImageDataGenerator(rescale=1.0/255., width_shift_range = 0.2,
                                  height_shift_range = 0.2, horizontal_flip = True)

test_generator = test_datagen.flow(test_X, batch_size = 32, shuffle=False)

In [None]:
test_predictions = test_resnetv250_model.predict(test_generator, verbose=1, use_multiprocessing=True)



In [None]:
def test_multiple_images(nrows, ncols):
    ncols = ncols
    nrows = nrows
    fig = plt.gcf()
    fig.set_size_inches(14, 14)
    img_data = next(test_generator)[:16]
    
    for idx, img in tqdm_notebook(enumerate(img_data)):
        
        # Set up subplot; subplot indices start at 1
        sp = plt.subplot(nrows, ncols, idx + 1)
        sp.axis('Off') # Don't show axes (or gridlines)
        #img = plt.imread(img)
        plt.imshow(img)
        #Adding additional axis to predict using test model
        img = img[np.newaxis, :]
        #Prediction label
        var = test_resnetv250_model.predict(img)
        pred = np.argmax(var, axis=1)[0]
        sp.set_title('Model prediction: {} - {}'.format(pred, classes_ls[pred]))

    plt.show()

test_multiple_images(4, 4)

#### Training a fully tuned EfficientNetV2-S model 

In [None]:
base_dir2 = os.path.join(os.getcwd(), "Preprocessed_Task1")
from tensorflow.keras.preprocessing.image import ImageDataGenerator
effb4_size = 384
classes_ls = ["Unknown_and_Others", "Cars", "2_Wheeler", "Offroad", "Industrial", "Utility", "Service", "Commercial"]

train_datagen = ImageDataGenerator(height_shift_range=0.15, fill_mode='nearest',
                                   horizontal_flip=True, rescale=1.0/255., validation_split=0.15,
                                   width_shift_range=0.15, brightness_range=(0.3, 0.7), zoom_range=0.1,
                                   rotation_range = 10, shear_range=0.1)

train_generator = train_datagen.flow_from_directory(
                  base_dir2, target_size=(effb4_size, effb4_size), color_mode='rgb',
                  classes = classes_ls,
                  class_mode='categorical', batch_size=16, shuffle=True,
                  subset="training", interpolation='lanczos')

val_generator = train_datagen.flow_from_directory(
                base_dir2, target_size=(effb4_size, effb4_size), color_mode='rgb',
                classes = classes_ls,
                class_mode='categorical', batch_size=16, shuffle=True,
                subset="validation", interpolation='lanczos')

Found 6441 images belonging to 8 classes.
Found 1132 images belonging to 8 classes.


In [None]:
#Loading the EfficientNetv2s model
effv2s_size = 384
do_fine_tuning = True
base_model = hub.KerasLayer("https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2",
                            trainable = do_fine_tuning)

effv2s_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(effv2s_size, effv2s_size,3)),
    base_model,

    # Add a fully connected layer 
    tf.keras.layers.Dense(512, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    #tf.keras.layers.BatchNormalization(),

    # Add another fully connected layer 
    tf.keras.layers.Dense(256, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    #tf.keras.layers.BatchNormalization(),
    
    # Add another fully connected layer 
    tf.keras.layers.Dense(128, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    #tf.keras.layers.BatchNormalization(),

    # Add another fully connected layer 
    tf.keras.layers.Dense(64, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    
    # Add a final sigmoid layer for classification
    tf.keras.layers.Dense(8, activation = "softmax")])         

effv2s_model.build((None, effv2s_size, effv2s_size, 3))

In [None]:
from tensorflow.keras.models import load_model
checkpoint_path = "EfficientV2s_training/training-017-0.9035-0.7473.h5"
effb4_iter2_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer}) 

#Compiling the model
from tensorflow.keras.optimizers import Adam

opt = Adam(learning_rate=1e-5)
loss = tensorflow.keras.losses.CategoricalCrossentropy()

with tf.device('/gpu:0'):
    effb4_iter2_model.compile(optimizer = opt, 
                       loss = loss, 
                       metrics = ["accuracy"])

    lor = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_accuracy', factor=0.25, patience=4, verbose=1,
        mode='auto', min_delta=0.01, min_lr=1e-13)

    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        "EfficientV2s_training/training-{epoch:03d}-{val_loss:.4f}-{val_accuracy:.4f}.h5",
        monitor='val_accuracy', verbose=1, save_best_only=True,
        save_weights_only = False, save_freq='epoch')

    history3 = effb4_iter2_model.fit(
            train_generator,
            validation_data = val_generator,
            epochs = 35, 
            verbose = 1, shuffle=True,
            callbacks = [lor, checkpoint])

#### Model EffNetV2s evaluation

In [None]:
from tensorflow.keras.models import load_model
checkpoint_path = "EfficientV2s_training/training-007-0.9817-0.7482.h5"

with tf.device('/gpu:0'):
    test_effb4_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})
    effb4_size = 384

    val_generator = train_datagen.flow_from_directory(
                      base_dir2, target_size=(effb4_size, effb4_size), color_mode='rgb',
                      classes = classes_ls,
                      class_mode='categorical', batch_size=16, shuffle=False,
                      subset="validation", interpolation='lanczos')

    predictions = test_effb4_model.predict(val_generator, verbose=1, use_multiprocessing=True)
    y_true = val_generator.classes
    
print(tf.math.confusion_matrix(
        y_true, np.argmax(predictions, axis=1), num_classes=8))

Found 1132 images belonging to 8 classes.
tf.Tensor(
[[  0   0   0   0   0   0   0   5]
 [  0 659   1  21   0  21   0  18]
 [  0   5  15   8   0   6   0   0]
 [  0  32   1  30   0  18   0   0]
 [  0   0   0   0  20   0   0   0]
 [  0  89   2  27   0  75   0   1]
 [  0   5   0   0   0   4  34   0]
 [  0  20   0   3   0   2   0  10]], shape=(8, 8), dtype=int32)


In [None]:
from tensorflow.keras.models import load_model
checkpoint_path = "EfficientV2s_training/training-017-0.9035-0.7473.h5"

with tf.device('/gpu:0'):
    test_effb4_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})
    effb4_size = 384

    val_generator = train_datagen.flow_from_directory(
                      base_dir2, target_size=(effb4_size, effb4_size), color_mode='rgb',
                      classes = classes_ls,
                      class_mode='categorical', batch_size=16, shuffle=False,
                      subset="validation", interpolation='lanczos')

    predictions = test_effb4_model.predict(val_generator, verbose=1, use_multiprocessing=True)
    y_true = val_generator.classes
    
print(tf.math.confusion_matrix(
        y_true, np.argmax(predictions, axis=1), num_classes=8))

Found 1132 images belonging to 8 classes.
tf.Tensor(
[[  0   0   0   0   0   0   0   5]
 [  0 653   2  26   2  21   0  16]
 [  0   6  19   3   0   6   0   0]
 [  0  38   0  27   0  16   0   0]
 [  0   0   0   0  19   0   0   1]
 [  0  82   3  21   0  88   0   0]
 [  0   2   0   1   0   6  34   0]
 [  0  19   0   2   0   4   0  10]], shape=(8, 8), dtype=int32)


#### Training a fully tuned EfficientNEtV2-S model on subset data

In [None]:
base_dir2 = os.path.join(os.getcwd(), "Preprocessed_subset_Task1")
from tensorflow.keras.preprocessing.image import ImageDataGenerator
effv2s_size = 384
classes_ls = ["Offroad", "Cars", "Utility"]

train_datagen = ImageDataGenerator(height_shift_range=0.15, fill_mode='nearest',
                                   horizontal_flip=True, rescale=1.0/255., validation_split=0.15,
                                   width_shift_range=0.15, brightness_range=(0.3, 0.7), zoom_range=0.1,
                                   rotation_range = 10, shear_range=0.1)

train_generator = train_datagen.flow_from_directory(
                  base_dir2, target_size=(effv2s_size, effv2s_size), color_mode='rgb',
                  classes = classes_ls,
                  class_mode='categorical', batch_size=8, shuffle=True,
                  subset="training", interpolation='lanczos')

val_generator = train_datagen.flow_from_directory(
                base_dir2, target_size=(effv2s_size, effv2s_size), color_mode='rgb',
                classes = classes_ls,
                class_mode='categorical', batch_size=8, shuffle=True,
                subset="validation", interpolation='lanczos')

Found 5649 images belonging to 3 classes.
Found 995 images belonging to 3 classes.


In [None]:
#Loading the EfficientNetv2s model
effv2s_size = 384
do_fine_tuning = True
base_model = hub.KerasLayer("https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2",
                            trainable = do_fine_tuning)

effv2s_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(effv2s_size, effv2s_size,3)),
    base_model,

    # Add a fully connected layer 
    tf.keras.layers.Dense(512, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    tf.keras.layers.Dropout(rate = 0.15),
    
    # Add another fully connected layer 
    tf.keras.layers.Dense(256, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    tf.keras.layers.Dropout(rate = 0.15),

    # Add another fully connected layer 
    tf.keras.layers.Dense(128, activation='relu', use_bias=True,
                 kernel_initializer = tf.keras.initializers.HeNormal(),
                 bias_initializer='zeros'),
    
    # Add a final sigmoid layer for classification
    tf.keras.layers.Dense(3, activation = "softmax")])         

effv2s_model.build((None, effv2s_size, effv2s_size, 3))

In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
checkpoint_path = "EfficientV2s_subset_training/training-016-0.7580-0.7930.h5"

with tf.device('/gpu:0'):
    effv2s_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})
    
    #Compiling the model
    opt = Adam(learning_rate=1e-6)
    loss = tensorflow.keras.losses.CategoricalCrossentropy()

    effv2s_model.compile(optimizer = opt, 
                       loss = loss, 
                       metrics = ["accuracy"])

    lor = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_accuracy', factor=0.25, patience=3, verbose=1,
        mode='auto', min_delta=0.01, min_lr=1e-13)

    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        "EfficientV2s_subset_training/training-{epoch:03d}-{val_loss:.4f}-{val_accuracy:.4f}.h5",
        monitor='val_accuracy', verbose=1, save_best_only=True,
        save_weights_only = False, save_freq='epoch')

    history3 = effv2s_model.fit(
            train_generator,
            validation_data = val_generator,
            epochs = 15, 
            verbose = 1, shuffle=True,
            callbacks = [lor, checkpoint])

In [None]:
from tensorflow.keras.models import load_model
checkpoint_path = "EfficientV2s_subset_training/training-001-0.7265-0.7889.h5"

with tf.device('/gpu:0'):
    test_effv2_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})
    effv2s_size = 384

    val_generator = train_datagen.flow_from_directory(
                      base_dir2, target_size=(effv2s_size, effv2s_size), color_mode='rgb',
                      classes = classes_ls,
                      class_mode='categorical', batch_size=8, shuffle=False,
                      subset="validation", interpolation='lanczos')

    predictions = test_effv2_model.predict(val_generator, verbose=1, use_multiprocessing=True)
    y_true = val_generator.classes
    
print(tf.math.confusion_matrix(
        y_true, np.argmax(predictions, axis=1), num_classes=3))

Found 995 images belonging to 3 classes.


2021-12-12 15:48:59.222530: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200


tf.Tensor(
[[ 21  39  21]
 [ 15 674  31]
 [ 10  97  87]], shape=(3, 3), dtype=int32)


**Inferences: Comparison between 0.7889 and 0.7930 models**
* *Slightly better on Utility class*
* *Better on cars class*
* *Worse on offroad class*

In [None]:
from tensorflow.keras.models import load_model
checkpoint_path = "EfficientV2s_subset_training/training-016-0.7580-0.7930.h5"

with tf.device('/gpu:0'):
    test_effv2_model = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})
    effv2s_size = 384

    val_generator = train_datagen.flow_from_directory(
                      base_dir2, target_size=(effv2s_size, effv2s_size), color_mode='rgb',
                      classes = classes_ls,
                      class_mode='categorical', batch_size=8, shuffle=False,
                      subset="validation", interpolation='lanczos')

    predictions = test_effv2_model.predict(val_generator, verbose=1, use_multiprocessing=True)
    y_true = val_generator.classes
    
print(tf.math.confusion_matrix(
        y_true, np.argmax(predictions, axis=1), num_classes=3))

Found 995 images belonging to 3 classes.
tf.Tensor(
[[ 32  36  13]
 [ 27 660  33]
 [ 23  86  85]], shape=(3, 3), dtype=int32)


### Testing performance using Stacked Ensemble soft voting classifier

In [None]:
#Transforming labels to appropriate classes for csv submission
def convert_label(label):
    """
    Takes an input predicted label from model and converts it into 
    the final class label 0/1/2
    """
    if label == 3 or label == 4 or label == 5:
        return 2
    elif label == 6 or label == 7:
        return 0
    else:
        return label

def sub_model_label(label):
    """
    Takes an input predicted label for stacked sub classifier and 
    outputs class label as itself or 2
    """
    if label == 0:
        return 2
    else:
        return label

#Reference classes list -- Preprocessed train set into 8 sub directories
classes_ls = ["Unknown_and_Others", "Cars", "2_Wheeler", "Offroad", "Industrial", "Utility", "Service", "Commercial"]

In [None]:
#Defining custom test generator with soft voting
def custom_augmentation_testgen(test_npy_file, 
                                saved_full_model_path,
                                saved_subset_model_path1,
                                saved_subset_model_path2,
                                test_files,
                                csv_file_name):
  
    """
    Inputs:
    test_npy_file: Input numpy file which contains test images in form (batch, size, size, channel_size)
    Use np.load("test_npy_file") to use it for custom generator

    saved_full_model_path: Full 8 classes classifier model path

    saved_subset_model_path1: Cars, Offroad and Utility class classfier model path
    Preferred when class label is Cars or Utility

    saved_subset_model_path2: Second Cars, Offroad and Utility class classfier model path 
    Preferred when class label is Offroad

    test_files: Test files loaded using glob from the directory -- Used to create column in DataFrame

    csv_file_name: Output csv file name to store the submission 

    Output: Submission csv file to upload on Kaggle
    """

    #Aggregating predictions on each test image
    prediction_ls = []
    
    #Used for prioritizing between stacked Image Classifier and Object Detection model -- 0/1
    flag_label = []

    #Loading saved full model
    checkpoint_path = saved_full_model_path
    test_model_1 = load_model(checkpoint_path, custom_objects={'KerasLayer':hub.KerasLayer})

    #Loading saved subset model 1 -- Trained on Cars, Offroad and Utility class only 
    #Preferred when class is Cars or Utility
    checkpoint_path2 = saved_subset_model_path1
    test_model_2 = load_model(checkpoint_path2, custom_objects={'KerasLayer':hub.KerasLayer})

    #Loading saved subset model 2 -- Trained on Cars, Offroad and Utility class only
    #Preferred when class is Offroad
    checkpoint_path3 = saved_subset_model_path2
    test_model_3 = load_model(checkpoint_path3, custom_objects={'KerasLayer':hub.KerasLayer})

    for img in tqdm_notebook(test_npy_file):
        #Stacking 6 rescaled images - the original, random shift 0.1*x & 0.1*y, random rotation 10 degrees,
        #                             random brightness, random zoom 0.1 and random shear - intensity 8
        batch_img = np.vstack((img[np.newaxis, :]/255,
                          
                          tf.keras.preprocessing.image.random_shift(
                          img, wrg = 0.1, hrg = 0.1, row_axis=0,
                          col_axis=1, channel_axis=2)[np.newaxis, :]/255, 
                          
                          tf.keras.preprocessing.image.random_rotation(
                          img, rg = 10, row_axis=0,
                          col_axis=1, channel_axis=2)[np.newaxis, :]/255,

                          tf.keras.preprocessing.image.random_brightness(
                          img, brightness_range = (0.3,0.7))[np.newaxis, :]/255,

                          tf.keras.preprocessing.image.random_zoom(
                          img, zoom_range = (0.1,0.1), row_axis=0,
                          col_axis=1, channel_axis=2)[np.newaxis, :]/255, 

                          tf.keras.preprocessing.image.random_shear(
                          img, intensity = 8, row_axis=0,
                          col_axis=1, channel_axis=2)[np.newaxis, :]/255                                   
                          ))
    
        #Checking whether the batch_img is valid or not
        assert batch_img.shape[0] == 6

        #Obtain predictions from the batch Image data
        predictions = test_model_1.predict(batch_img)
        assert predictions.shape[0] == batch_img.shape[0]

        #Performing soft voting based on probabilities -- single classifier
        final_label = np.argmax(np.average(predictions, axis=0))
        img_class = classes_ls[final_label]

        #Building stacking classifier if predicted is either Cars, Offroad or Utility class 
        if img_class == "Cars" or img_class == "Utility":
            #Predicting using seperate subset classifier 1 trained on these 3 classes 
            pred = test_model_2.predict(batch_img)
            assert pred.shape[0] == batch_img.shape[0]
            

            #Implementing soft voting and appending the predicted label
            label = np.argmax(np.average(pred, axis=0))
            prediction_ls.append(sub_model_label(label))
            flag_label.append(1)

        elif img_class == "Offroad":
            #Predicting using seperate subset classifier 1 trained on these 3 classes 
            pred = test_model_3.predict(batch_img)
            assert pred.shape[0] == batch_img.shape[0]

            #Implementing soft voting and appending the predicted label
            label = np.argmax(np.average(pred, axis=0))
            prediction_ls.append(sub_model_label(label))
            flag_label.append(1)

        else:
            prediction_ls.append(convert_label(final_label))
            flag_label.append(0)

    #Creating a dataframe and saving it as output
    assert len(prediction_ls) == test_npy_file.shape[0]

    #Based on submission regulations
    image_file = [fname[5:-10] for fname in test_files]
    data = {'guid/image': image_file, 'label': prediction_ls, 'flag':flag_label}
  
    #Creating the pandas DataFrame and saving it
    output_df = pd.DataFrame(data)
    output_df.to_csv(csv_file_name, index=False)
    print("File_downloaded as .csv!!")

In [None]:
#Running the function
test_npy_file = np.load("effv2s_test_x.npy") #Contains each image of dim (384,384,3) -- Input for EfficientV2s model
model_path = "EfficientV2s_training/training-017-0.9035-0.7473.h5"
model_path2 = "EfficientV2s_subset_training/training-001-0.7265-0.7889.h5"
model_path3 = "EfficientV2s_subset_training/training-016-0.7580-0.7930.h5"
test_files = sorted(glob('test/*/*_image.jpg'))
file_name = "Stacked_EfficientV2s_softVoting_Team11_flag.csv"

custom_augmentation_testgen(test_npy_file = test_npy_file,
                            saved_full_model_path = model_path,
                            saved_subset_model_path1 = model_path2,
                            saved_subset_model_path2 = model_path3,
                            test_files = test_files,
                            csv_file_name = file_name)