# **Setup**

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import os 
import cv2 
import PIL

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

import torch
from torch import nn 
from torch import optim as O
from torch.nn import Module
from torch.nn import functional as f
from torch.nn.modules.activation import Sigmoid
from torch.nn.modules.upsampling import UpsamplingBilinear2d
from torch.nn.modules.conv import Conv2d
from torch.optim.lr_scheduler import LinearLR
from torch.utils.data import Dataset 
from torch.utils.data import DataLoader


import torchvision
from torchvision import utils as Vision_utils
from torchvision import models
from torchvision.transforms import functional as TF

from glob import glob

import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm 

# **Loading the Data and Exploratory Data Analysis**

I have loaded the dataset, and did general data analysis, like how many unique images, classes, subjects etc.

In [2]:
## I am reading the csv file using a pandas dataframe adn then adding a columnn name file_path containing the image
## path for the corresponding entry in the dataset

arr = os.listdir("/kaggle/input")
data_dir = os.path.join("/kaggle/input", arr[0])
list_dir = os.listdir(data_dir)
path_for_train_dataset = os.path.join(data_dir, list_dir[1])
dataframe = pd.read_csv(path_for_train_dataset)
dataframe.head()

In [3]:
del path_for_train_dataset, list_dir

In [4]:
## train file list
train_file = glob(os.path.join(data_dir, "imgs/train/*/*.jpg"))

## Now we have a list of images of training dataset, let us add a path to the training pandas dataframe 
dataframe["file_path"] = dataframe.apply(lambda x : os.path.join(data_dir, 'imgs/train', x.classname, x.img), axis = 1)
dataframe["labels"] = dataframe["classname"].map(lambda x : int(x[1]))
dataframe.head()

In [5]:
num_dataset = len(dataframe)
print("The number of Images in the dataset :", num_dataset)

In [6]:
## Now we have a Dataframe of the dataset. Let us use it to visualize the dataset and plotting relative graphs
## Some raw visualisation led me to find that there are diffent photos of a single person.
## That is let us find the subject 
arr = dataframe["subject"].value_counts()
plt.bar(arr.index, arr.values)
plt.grid()
plt.xticks(rotation = 90)
plt.xlabel("Subjects")
plt.ylabel("Number of images")
plt.show()

print("Number of Different Subject :", len(arr.index))
print("Average Number of Images per subject :", arr.values.sum()//len(arr.index))

In [7]:
## So this survey has been done on 26 differnet subject 
## Each subject has nearly 862 images each
## Let us also make a visualisation about the labels on the dataset.
arr = dataframe["labels"].value_counts()
plt.bar(arr.index, arr.values)
plt.grid()
plt.xticks(rotation = 90)
plt.xlabel("Labels")
plt.ylabel("Number of images")
plt.show()

print("Number of labels :", len(arr.index))
print("Average Number of Images per Label :", arr.values.sum()/len(arr.index))

In [8]:
## Shuffling the Dataframe and splitting into train, test and validation dataset
dataframe = dataframe.sample(frac = 1)
dataframe.reset_index(inplace = True)
dataframe.head()

In [9]:
encoder = OneHotEncoder(sparse = False)
labels = np.array(dataframe["labels"])
labels 

In [10]:
labels = encoder.fit_transform(labels.reshape((22424, 1)))
labels.shape

In [11]:
train = labels[0 : 16000]
val = labels[16000 : 20000]
test = labels[20000 : ]

In [12]:
print(train.shape, val.shape, test.shape)

In [13]:
train_df = dataframe[0 : 16000]
val_df = dataframe[16000 : 20000]
test_df = dataframe[20000 : ]

del dataframe, labels 

# **Building the DataPipeline**

In [14]:
class DriverDataset(Dataset) :
    
    def __init__(self, df, labels, transform = None) :
        super(DriverDataset, self).__init__()
        self.img_dir = df["file_path"]
        self.labels = labels
        self.transform = transform 
        
    def __len__(self) :
        return(len(self.labels))
    
    def __getitem__(self, index) :
        img_pth = self.img_dir[index]
        
        image_ = cv2.imread(img_pth)
        image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2RGB)
        image_ = np.array(image_, dtype = np.float32)
        
        label = self.labels[index]
        
        if self.transform is not None :
            augmentations = self.transform(image = image_)
            image_ = augmentations["image"]
            
        return image_, label

In [15]:
def get_loaders(train_df, train_label, val_df, val_label, batch_size, train_transform, val_transform, num_workers = 2, pin_memory = True) :
    
    trainDS = DriverDataset(train_df, train_label, train_transform)
    valDS = DriverDataset(val_df, val_label, val_transform)
    
    train_loader = DataLoader(trainDS, batch_size, num_workers = num_workers, pin_memory = pin_memory, shuffle = False)
    val_loader = DataLoader(valDS, batch_size, num_workers = num_workers, pin_memory = pin_memory, shuffle = False)
    
    return train_loader, val_loader

# **Other Utility Functions**

In [16]:
def save_checkpoint(state, filename = "my_checkpoint.pth.tar") :
    print("=> Saving Checkpoint")
    torch.save(state, filename)

def load_checkpoint(checkpoint, model) :
    print("=> Loading Checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    
def eval_model(model):
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model_ft(images)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100.0 * correct / total
    print('Accuracy of the network on the test images: %d %%' % (test_acc))
    return test_acc
            



# **Preparing the HyperParameters**

In [17]:
learning_rate = 3e-4
device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 16
num_epochs = 30
Image_height = 256
Image_width = 256
pin_memory = True 
load_model = False
num_workers = 2

# **Building the Model**

In [18]:
## I am going to use the ResNeXt-101 pretrained on imagenet

model = models.resnet18(pretrained = False)

In [19]:
model

In [20]:
model.fc = nn.Sequential(nn.Linear(512, 10),
                                nn.Sigmoid())
model = model.to(device)

In [21]:
## Image Augmentation using Albumentations

train_transform = A.Compose(
    [
        A.Resize(Image_height, Image_width),
        A.Rotate(limit = 35, p = 1.0),
        A.HorizontalFlip(p = 0.5),
        A.VerticalFlip(p = 0.1),
        A.Normalize(
            mean = [0.0, 0.0, 0.0],
            std = [1.0, 1.0, 1.0],
            max_pixel_value = 255.0
        ),
        ToTensorV2(),  
    ]
)

val_transforms = A.Compose(
    [
        A.Resize(Image_height, Image_width), 
        A.Normalize(
            mean = [0.0, 0.0, 0.0],
            std = [1.0, 1.0, 1.0],
            max_pixel_value = 255.0
        ),
        ToTensorV2(),
    ]
)

In [22]:
## Defining the train Function

def train_fn(loader, model, optimizer, loss_fn, scaler) :
    loop = tqdm(loader)

    for batch_idx, (data, targets) in enumerate(loop) :
        data = data.to(device)
        targets = targets.to(device)

        ## Forward
        with torch.cuda.amp.autocast() :
            predictions = model(data)
            loss = loss_fn(predictions, targets)

        ## Backwards 
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        ## Update the tqdm loader
        loop.set_postfix(loss = loss.item())

In [23]:
optimizer = O.Adam(model.parameters(), lr = learning_rate)
scheduler = LinearLR(optimizer, 0.33, 1)
train_loader, val_loader = get_loaders(
            train_df, train, val_df, val, batch_size, train_transform, val_transforms,
            num_workers, pin_memory 
        )
loss_fn = nn.BCEWithLogitsLoss()

def Training(load_model) :
    if load_model :
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), model)

    #check_accuracy(val_loader, model, device)
    scaler = torch.cuda.amp.GradScaler()

    for epoch in range(num_epochs) :
        train_fn(train_loader, model, optimizer, loss_fn, scaler)
        scheduler.step()

        ## Saving the Model
        checkpoint = {
            "state_dict" : model.state_dict(), 
            "optimizer" : optimizer.state_dict()   
        }

        save_checkpoint(checkpoint)

        ## Checking the Accuracy 
        #check_accuracy(self.val_loader, self.model, self.device)


In [24]:
Training(False)

In [25]:
y_pred = list()
with torch.no_grad() :
    for img_pth in test_df["file_path"] :
        image_ = cv2.imread(img_pth)
        image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2RGB)
        image_ = cv2.resize(image_, (224, 224))
        image_ = np.array(image_, dtype = np.float32)
        image_ = torch.tensor(image_).to(device)
        image_ = torch.reshape(image_, (1, 3, 224, 224))
        y = np.array(model(image_).squeeze().cpu())
        y_pred.append(y)
    

In [26]:
y_pred = np.array(y_pred)
y_pred.shape

In [27]:
y_pred[0]

In [28]:
test.shape

In [29]:
y_pred = np.argmax(y_pred, 1)
y_true = np.argmax(test, 1)

In [30]:
accuracy_score(y_true, y_pred)

In [31]:
confusion_matrix(y_true, y_pred)