In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import torch
import torchvision
import torchvision.transforms as transforms
import tarfile
import pandas as pd
import os
import re
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from io import StringIO
from PIL import Image
import re
from sklearn.metrics import accuracy_score, f1_score, precision_score
import pickle
import torchvision.models as models

from google.colab import drive
drive.mount('/content/drive')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

class DatasetClass(Dataset):
    
    def __init__(self, directory, img_size):
        
        self.directory = directory
        self.classes = ['026.Bronzed_Cowbird',	'084.Red_legged_Kittiwake',	'131.Vesper_Sparrow',	'085.Horned_Lark',	'015.Lazuli_Bunting',	'041.Scissor_tailed_Flycatcher',	'114.Black_throated_Sparrow']
        print('Number of Classes =', len(self.classes))
        self.files = []
        for class_name in self.classes:
            images = os.listdir(directory + '/' + class_name)
            images = [class_name + '/' + image for image in images]
            self.files.extend(images)
        
        self.img_size = img_size
        self.size = len(self.files)
        
    def __getitem__(self, idx):     
        
        image_name = self.files[idx]
        y = self.classes.index(re.split('/', image_name)[0])
        img = Image.open(self.directory + '/' + image_name).convert(mode='RGB').resize(self.img_size)
        
        trans = transforms.ToTensor()
        # return trans(img), torch.Tensor(y, dtype=torch.long)
        
        return trans(img), y        # Multiplying by pixel value
      
    def __len__(self):
        
        return self.size

def train_test_loader(directory, img_size, train_fraction=0.7, cv_fraction=0.2, num_workers=0, batch_size=32):

    dataset = DatasetClass(directory, img_size)
    
    N = dataset.size
    train_size = int(N*train_fraction)
    cv_size = int(N*cv_fraction)
    test_size = N - train_size - cv_size

    train_data, cv_data, test_data = torch.utils.data.random_split(dataset, [train_size, cv_size, test_size])

    trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    cvloader = DataLoader(cv_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    testloader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    
    return trainloader, cvloader, testloader, train_size, cv_size, test_size

trainloader, cvloader, testloader, train_size, cv_size, test_size = train_test_loader('/content/drive/My Drive/Assignment3_Data/CUB_200_2011/images', (224, 224), batch_size=32)
RGB_mean = torch.zeros(3)
i = 0
for X, y in trainloader:
    i += 1
    RGB_mean += (X.sum(0).sum(1).sum(1)/(X.shape[2]*X.shape[2]))/train_size
    print(i, '/', len(trainloader), end=', ')

class VGGNet(nn.Module):
    
    def __init__(self, RGB_mean, num_classes):
        super(VGGNet, self).__init__()
        
        self.RGB_mean = RGB_mean.to(device)
        self.num_classes = num_classes

        self.c11 = nn.Conv2d(3, 64, 3, stride=1, padding=1)
        self.c12 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.p1 = nn.MaxPool2d(2, stride=2)

        self.c21 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
        self.c22 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
        self.p2 = nn.MaxPool2d(2, stride=2)

        self.c31 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
        self.c32 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
        self.c33 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
        self.p3 = nn.MaxPool2d(2, stride=2)

        self.c41 = nn.Conv2d(256, 512, 3, stride=1, padding=1)
        self.c42 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.c43 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.p4 = nn.MaxPool2d(2, stride=2)

        self.c51 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.c52 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.c53 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
        self.p5 = nn.MaxPool2d(2, stride=2)

        self.flat = nn.Flatten(1, -1)
        self.fc1 = nn.Linear(7*7*512, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.out = nn.Linear(4096, self.num_classes)

    def forward(self, x):

        x = x - self.RGB_mean[None, :, None, None]
        x = self.p1(F.relu(self.c12(F.relu(self.c11(x)))))
        x = self.p1(F.relu(self.c22(F.relu(self.c21(x)))))
        x = self.p3(F.relu(self.c33(F.relu(self.c32(F.relu(self.c31(x)))))))
        x = self.p4(F.relu(self.c43(F.relu(self.c42(F.relu(self.c41(x)))))))
        x = self.p5(F.relu(self.c53(F.relu(self.c52(F.relu(self.c51(x)))))))
        x = F.relu(self.fc2(F.relu(self.fc1(self.flat(x)))))
        Z = self.out(x)


        return Z
    
VGG_model = VGGNet(RGB_mean, 7)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(VGG_model.parameters(), lr=0.001, momentum=0.9)

vgg16 = pickle.load(open('/content/drive/My Drive/vgg_init.sav', 'rb'))
params = list(vgg16.parameters())

with torch.no_grad():
    
    VGG_model.c11.weight = params[0]
    VGG_model.c11.bias = params[1]
    VGG_model.c12.weight = params[2]
    VGG_model.c12.bias = params[3]
    
    VGG_model.c21.weight = params[4]
    VGG_model.c21.bias = params[5]
    VGG_model.c22.weight = params[6]
    VGG_model.c22.bias = params[7]

    VGG_model.c31.weight = params[8]
    VGG_model.c31.bias = params[9]
    VGG_model.c32.weight = params[10]
    VGG_model.c32.bias = params[11]
    VGG_model.c33.weight = params[12]
    VGG_model.c33.bias = params[13]

    VGG_model.c41.weight = params[14]
    VGG_model.c41.bias = params[15]
    VGG_model.c42.weight = params[16]
    VGG_model.c42.bias = params[17]
    VGG_model.c43.weight = params[18]
    VGG_model.c43.bias = params[19]

    VGG_model.c51.weight = params[20]
    VGG_model.c51.bias = params[21]
    VGG_model.c52.weight = params[22]
    VGG_model.c52.bias = params[23]
    VGG_model.c53.weight = params[24]
    VGG_model.c53.bias = params[25]

    VGG_model.fc1.weight = params[26]
    VGG_model.fc1.bias = params[27]

    VGG_model.fc2.weight = params[28]
    VGG_model.fc2.bias = params[29]

VGG_model = VGG_model.to(device)
old_loss = np.inf

max_epoch = 100
losses = []
for epoch in range(max_epoch):

    running_loss = 0.0
    
    for data in trainloader:
        
        X, y = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()
        
        # Forward
        y_hat = VGG_model(X)
        
        # Calculate Loss (Cross Entropy)
        loss = criterion(y_hat, y)
        
        # Backpropagation
        loss.backward()
        
        # Update Parameters
        optimizer.step()
        
        running_loss += loss.item()*len(X)/train_size
    
    print('Epoch', epoch+1, ': Loss =', running_loss, abs(running_loss-old_loss)/running_loss)
    losses.append(running_loss)

    if abs(running_loss-old_loss)/running_loss < 1e-2 and running_loss<0.05:
        print('Converged')
        break
    
    old_loss = running_loss

print('Finished Training')
plt.plot(losses)
plt.ylabel('Loss')
plt.xlabel('Iter Number')
plt.title('Convergence monitor plot')
plt.show()

with torch.no_grad():
    
    train_loss = 0.0
    y_train = []
    y_train_pred = []

    for data in trainloader:

        X, y = data[0].to(device), data[1].to(device)
        y_hat = VGG_model(X)
        train_loss += criterion(y_hat, y)*len(X)/train_size
        
        y_train.extend(list(y.detach().cpu().numpy()))
        y_train_pred.extend(list(torch.argmax(y_hat, axis=1).detach().cpu().numpy()))

print('Train Loss =', train_loss.item())
pd.DataFrame(confusion_matrix(y_train, y_train_pred))

''' the confusion matrix gives you insight into how well your classification model is performing.

7x7 because there are 7 classes (0-6)
The numbers in the diagonal represent the number of correct predictions for each class. 
There are 43 instances of class 0 correctly classified as class 0,
36 instances of class 1 correctly classified as class 1, and so on.
Off-diagonal elements represent misclassifications. 
For example, there are no instances of class 0 mistakenly classified as class 1, class 2, class 3, etc. 
Hence, all the values in the first row except for the diagonal element are 0.
The sum of each row represents the total number of instances for each predicted class, 
while the sum of each column represents the total number of instances for each actual class.

0	1	2	3	4	5	6
0	43	0	0	0	0	0	0
1	0	36	0	0	0	0	0
2	0	0	46	0	0	0	0
3	0	0	0	37	0	0	0
4	0	0	0	0	40	0	0
5	0	0	0	0	0	42	0
6	0	0	0	0	0	0	43
'''
acc_tr = accuracy_score(y_train, y_train_pred)
prec_tr = precision_score(y_train, y_train_pred, average='weighted')
f1_tr = f1_score(y_train, y_train_pred, average='weighted')

print(
    'Train Accuracy =', acc_tr, 'Train Precision =', prec_tr, 'Train F1 =', f1_tr)