In [None]:
import os
from os import listdir
import time

# Torch libs
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader

from torch.utils.data.sampler import WeightedRandomSampler

import torchvision.transforms as T
from torchvision.utils import make_grid
from torchvision.models import resnet50
import torchvision.transforms as transforms
from torchvision.io import read_image

from sklearn.model_selection import train_test_split

# Data libs
import pandas as pd
import numpy as np
# import cv2
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
%matplotlib inline

## EDA: 
Check if the image has a dog or not<br>
If yes, count number of dogs/faces

### Read a doggo

In [None]:
######################## REPLACE WITH YOUR NAME TO LOCATE FILES IN YOUR REPO ########################
YOUR_NAME="nora"

## Neural Nets
(the OG way)

**Sample annotations file**

In [None]:
# import xml.etree.ElementTree as ET
# from pathlib import Path

# # Chihuahua
# path = '../../dogs/Annotation/n02085620-Chihuahua/n02085620_10074'

# with open(path) as annot_file:
#     print(''.join(annot_file.readlines()))

### 1. Using pre-trained image using `ImageNet` dataset

#### Check if CUDA is available to use

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

#### Create `labels` from Felix's code

In [None]:
os.chdir('../../dogs/Images')


labels = []
image_path = []
label_num = []

label_count = 0
for root, dirs, files in os.walk("."):
    if root!=".":
        for name in files:
    #         print(os.path.join(root, name))
    #         print(root.split("-",1)[1])

            labels.append(root.split("-",1)[1])
            image_path.append(os.path.join(root, name))

            if len(labels)>1 and labels[-1] != labels[-2]:
                    label_count += 1

            label_num.append(label_count)
        

df = pd.DataFrame({'labels':labels,'image_path':image_path, 'label_num':label_num}) 

# Create label dict for later use
breeds = pd.Series(df.labels.values,index=df.label_num).to_dict()

# saving the dataframe 
df.to_csv('../../experiments/nora/labels.csv') 
display(df.head())
display(df.tail())

In [None]:
# # Test annotations
# doggo_labels = pd.read_csv("../../experiments/"+YOUR_NAME+"/labels.csv")
# index = 400
# print("Index: ", doggo_labels.iloc[index, 0], "\nLabel: ", doggo_labels.iloc[index, 1], "\nImage directory: ", doggo_labels.iloc[index, 2], "\nLabel number: ", doggo_labels.iloc[index, 3])

# print("\nTotal number of samples: ", doggo_labels.shape[0], "\nDog breeds/ unique labels: ", len(pd.unique(doggo_labels['labels'])))

# # print("\nBreed value counts:", doggo_labels['labels'].value_counts())

# print("\nNumber images per breed \nMax: ", max(doggo_labels['labels'].value_counts()), "\nMin:", min(doggo_labels['labels'].value_counts()))

In [None]:
# Create train, validate and test with proportional classes
from sklearn.model_selection import train_test_split

train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

label_num = np.array(doggo_labels.iloc[:, 3])

# labels
dataY = label_num

# features
dataX_dummy = range(len(label_num)) 

# train is now 75% of the entire data set
index_train, index_test, y_train, y_test = train_test_split(dataX_dummy, dataY, test_size=1 - train_ratio, stratify=dataY)

# test is now 10% of the initial data set
# validation is now 15% of the initial data set
index_val, index_test, y_val, y_test = train_test_split(index_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio), stratify=y_test) 


In [None]:
# Check indices work
img_annotations = pd.read_csv("../../experiments/"+YOUR_NAME+"/labels.csv").iloc[index_train]
display(img_annotations.head())
print(img_annotations.iloc[0,2])
print(img_annotations.iloc[1,2])

#### Using same Dataset class and config as Felix

#### Different ways to optimize
1. Optimal way to crop the image
2. Changing number of channels - Grayscale, RGB, CMYK
3. Normalize the tensor - either with 0s/1s or with mean/variance 

**TODO:** Some images have channels > 4 which will cause issues while using the pre-trained ResNet50 model which is trained on images with 3 channels.<br>
    
_Ways to rectify:_<br>
    1. Convert channel 4 to Grayscale (2)<br>
    2. Convert Grayscale (2) to RBG (3)<br>

In [None]:
class DoggoDataset(Dataset):
    def __init__(self, indices, transform=None):
        self.img_annotations = pd.read_csv("../../experiments/"+YOUR_NAME+"/labels.csv").iloc[indices]
        # Convert to Grayscale and crop it to 120x120
        self.transform = transforms.Compose([
#             transforms.Resize((120,120)),
            transforms.CenterCrop(120),
            # transforms.Grayscale(),
#             transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor()
            # transforms.Normalize((0, 0, 0),(1, 1, 1))
        ])

    def __len__(self):
        return len(self.img_annotations)

    def __getitem__(self, idx):
        img_path = self.img_annotations.iloc[idx, 2]
        image = Image.open(img_path)
        image = image.convert('RGB')
        label = self.img_annotations.iloc[idx, 3]

        if self.transform:
            image = self.transform(image)

        return image, label

### `DataLoader()`

#### Adding an extra parameter `num_workers`

`num_workers` can be decided based off of # of cores on the EC2 instance<br>
**p2.xlarge** instance which has 4 CPU cores <br>
**g4dn.2xlarge** instance which has 8 CPU cores

Doubling the `batch_size` to 128 since we're using CUDA on a GPU instance

In [30]:
train_set = DoggoDataset(index_train)
val_set = DoggoDataset(index_val)
test_set = DoggoDataset(index_test)

num_workers=4
train_dataloader = DataLoader(train_set, batch_size=128, num_workers=num_workers, shuffle=True)
val_dataloader = DataLoader(val_set, batch_size=128, num_workers=num_workers, shuffle=True)
test_dataloader = DataLoader(test_set, batch_size=128, num_workers=num_workers, shuffle=True)

### Custom functions
to make stuff modular

#### 1. Accuracy

In [None]:
def get_accuracy(pred, true):
    # Converting pred to 0 or 1
    pred = [1 if pred[i] >= 0.5 else 0 for i in range(len(pred))]
    # Calculating accuracy by comparing predictions with true labels
    acc = [1 if pred[i] == true[i] else 0 for i in range(len(pred))]
    # Compute accuracy
    acc = np.sum(acc) / len(pred)

    
    return (acc * 100)

#### 2. Train the model over one epoch

In [None]:
def train_one_epoch(train_dataloader):
    
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    
    for images, labels in train_dataloader:
        
        # ResNet is trained on images with only 3 channels
        if(images.shape[1] == 3):
            
            # Load images and labels to device - in our case GPU!
            images = images.to(device)
            # print(images.shape)
            labels = labels.to(device)
            # print(labels.shape)
#             labels = labels.reshape((1,labels.shape[0])) # [N, 1] - to match with preds shape
            labels = labels.to(torch.long)
        
            # Reseting Gradients
            optimizer.zero_grad()

            # Forward
            preds = model(images)
            _, max_pred = torch.max(preds, 1)
            pred_mat_tensor=torch.zeros( labels.shape[0], 120, device=device)
            for index_num, label in enumerate(labels):
                pred_mat_tensor[index_num, max_pred[index_num]]=1 #set to prediction value?
            pred_mat_tensor=pred_mat_tensor
#             pred_mat_tensor[max_pred, ]
#             other_preds=other_preds.reshape((other_preds.shape[0],1)).to(torch.float32)

#             print("------------------------------------")
            pred_mat_tensor.requires_grad_()
    
            # Calculating Loss
            _loss = criterion(preds, labels)
            loss = _loss.item()
            epoch_loss.append(loss)

            # Calculating Accuracy
            acc = get_accuracy(max_pred, labels)
            epoch_acc.append(acc)

            # Backward
            _loss.backward()
            optimizer.step()
    
    # Overall Epoch Results
    end_time = time.time()
    total_time = end_time - start_time
    
    # Acc and Loss
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    # Log the results
    train_logs["loss"].append(epoch_loss)
    train_logs["accuracy"].append(epoch_acc)
    train_logs["time"].append(total_time)
        
    return epoch_loss, epoch_acc, total_time

#### 3. Validate the model over one epoch

In [None]:
def val_one_epoch(val_dataloader, best_val_acc):
    
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    
    for images, labels in val_dataloader:
        
        # ResNet is trained on images with only 3 channels
        if(images.shape[1] == 3):
        
            # Load images and labels to device - again GPU!
            images = images.to(device)
            labels = labels.to(device)
#             labels = labels.reshape((1,labels.shape[0])) # [N, 1] - to match with preds shape
            labels = labels.to(torch.long)

            # Forward
            preds = model(images)
            _, max_pred = torch.max(preds, 1)

            pred_mat_tensor=torch.zeros( labels.shape[0], 120, device=device)
            for index_num, label in enumerate(labels):
                pred_mat_tensor[index_num, max_pred[index_num]]=1 # change probably
                

#             pred_mat_tensor[max_pred, ]
#             other_preds=other_preds.reshape((other_preds.shape[0],1)).to(torch.float32)

            pred_mat_tensor.requires_grad_()
            # Calculating Loss
            _loss = criterion(preds, labels)
            loss = _loss.item()
            epoch_loss.append(loss)

            # Calculating Accuracy
            acc = get_accuracy(max_pred, labels)
            epoch_acc.append(acc)
    
    # Overall Epoch Results
    end_time = time.time()
    total_time = end_time - start_time
    
    # Acc and Loss
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    # Log the results
    val_logs["loss"].append(epoch_loss)
    val_logs["accuracy"].append(epoch_acc)
    val_logs["time"].append(total_time)
    
    # Save the best model
    if epoch_acc > best_val_acc:
        best_val_acc = epoch_acc
        torch.save(model.state_dict(),"resnet50_best.pth")
        
    return epoch_loss, epoch_acc, total_time, best_val_acc
        

## `ResNet50`
Let's gooooooo

In [None]:
model = resnet50(pretrained = True)
num_ftrs = model.fc.in_features

model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 120, bias = True),
    nn.Softmax()
)

for param in model.parameters():
    param.requires_grad = True

#### Model stuff

In [None]:
# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

# Learning Rate Scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)

# Loss Function
criterion = nn.CrossEntropyLoss()

# Logs
train_logs = {"loss" : [], "accuracy" : [], "time" : []}
val_logs = {"loss" : [], "accuracy" : [], "time" : []}

# Loading model to device
model.to(device)

# No of epochs 
epochs = 75

In [None]:
best_val_acc = 0 # this will be computed in the validation step
optimizer.zero_grad()
for epoch in range(epochs):
#     print(epoch)
    # Training
    loss, acc, _time = train_one_epoch(train_dataloader)
    print("--------------------------------")
    print("\nTraining")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4)))
    print("Time : {}".format(round(_time, 4)))
    
    # Validation
    loss, acc, _time, best_val_acc = val_one_epoch(val_dataloader, best_val_acc)
    
    print("\nValidating")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4)))
    print("Time : {}".format(round(_time, 4)))
    lr_scheduler.step()

# Testing Model Results

In [None]:
def test_model(test_dataloader):
    
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    
    for images, labels in test_dataloader:
        
        # ResNet is trained on images with only 3 channels
        if(images.shape[1] == 3):
            
            # Load images and labels to device - in our case GPU!
            images = images.to(device)
            # print(images.shape)
            labels = labels.to(device)
            # print(labels.shape)
#             labels = labels.reshape((1,labels.shape[0])) # [N, 1] - to match with preds shape
            labels = labels.to(torch.long)
        
            # Reseting Gradients
#             optimizer.zero_grad()

            # Forward
            preds = model(images)
            _, max_pred = torch.max(preds, 1)
#             print(max_pred.shape)
#             print(max_pred)
            pred_mat_tensor=torch.zeros( labels.shape[0], 120, device=device)
            for index_num, label in enumerate(labels):
                pred_mat_tensor[index_num, max_pred[index_num]]=1
            pred_mat_tensor=pred_mat_tensor

#             pred_mat_tensor.requires_grad_()

            acc = get_accuracy(max_pred, labels)
        return pred_mat_tensor, labels


In [None]:
# test_pred_mat_tensor, test_labels=test_model(train_dataloader)

In [None]:
# ,max_pred_label=torch.max(test_pred_mat_tensor, 1)

In [None]:
# max_pred_label

In [None]:
# test_labels==max_pred_label

In [None]:
# test_labels

### TODO:

1. Dockerize the entire codebase
2. Use Tensorflow serving to fetch the latest trained model instead of training every time
3. Compare the performance of ResNet34, VGG-16 and a few more architectures with ResNet