In [1]:
!pip3 install torch torchvision



In [2]:
!pip install imagecorruptions

Collecting imagecorruptions
  Downloading imagecorruptions-1.1.2-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: imagecorruptions
Successfully installed imagecorruptions-1.1.2


In [3]:
# Lets say N is increasing

In [4]:
import torch
import torchvision
from imgaug import augmenters as iaa

In [5]:
ls

[0m[01;34msample_data[0m/


In [6]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cs231n/assignments/assignment1/'
FOLDERNAME = 'training'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))
# sys.path.append('/content/drive/{}')

# This downloads the CIFAR-10 dataset to your Drive
# if it doesn't already exist.
# %cd /content/drive/My\ Drive/$FOLDERNAME/cs231n/datasets/
# !bash get_datasets.sh
# %cd /content/drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive


In [7]:
%cd /content/drive/My\ Drive/$FOLDERNAME/
# %cd /content/drive/$FOLDERNAME/

/content/drive/My Drive/training


In [8]:
# !wget https://data.caltech.edu/records/65de6-vp158/files/CUB_200_2011.tgz

## Download Original Dataset

In [9]:
# !tar xvzf CUB_200_2011.tgz

In [10]:
# !cat CUB_200_2011/README

In [11]:
# %cd drive/$FOLDERNAME

In [12]:
# cd drive/MyDrive/CS682Project

## Import required libraries

In [13]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import pandas as pd
import numpy as np
import torch
from pathlib import Path
from torch.utils.data import Dataset, DataLoader, ConcatDataset

import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import random
import cv2
import matplotlib.pyplot as plt
import torchvision.transforms as transforms


In [14]:
import albumentations as A

## Helper Functions

read_image() functions reads image and converts it from BGR to RGB as opencv used BGR while matplotlib uses RGB. So, we need to make this conversion so we can easily visualize the data



In [15]:
def read_image(path):
    im = cv2.imread(str(path))
    return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

In [16]:
# Data Augmentation
import math

def crop(im, r, c, target_r, target_c): return im[r:r+target_r, c:c+target_c]

def random_crop(x, target_r, target_c):
    """ Returns a random crop"""
    r,c,*_ = x.shape
    rand_r = random.uniform(0, 1)
    rand_c = random.uniform(0, 1)
    start_r = np.floor(rand_r*(r - target_r)).astype(int)
    start_c = np.floor(rand_c*(c - target_c)).astype(int)
    return crop(x, start_r, start_c, target_r, target_c)

def rotate_cv(im, deg, mode=cv2.BORDER_REFLECT, interpolation=cv2.INTER_AREA):
    """ Rotates an image by deg degrees"""
    r,c,*_ = im.shape
    M = cv2.getRotationMatrix2D((c/2,r/2),deg,1)
    return cv2.warpAffine(im,M,(c,r), borderMode=mode,
                          flags=cv2.WARP_FILL_OUTLIERS+interpolation)


In [17]:
def normalize(im):
    """Normalizes images with Imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im/255.0 - imagenet_stats[0])/imagenet_stats[1]

In [18]:
pwd

'/content/drive/My Drive/training'

In [19]:
PATH = Path('./dataset/CUB_200_2011')
labels = pd.read_csv(PATH/"image_class_labels.txt", header=None, sep=" ")
labels.columns = ["id", "label"]
train_test = pd.read_csv(PATH/"train_test_split.txt", header=None, sep=" ")
train_test.columns = ["id", "is_train"]
images = pd.read_csv(PATH/"images.txt", header=None, sep=" ")
images.columns = ["id", "name"]

In [20]:
# can change this
def apply_transform_crop(x, sz=(224, 224), zoom=1.05):
    """ Applies a random crop, rotation"""
    sz1 = int(zoom*sz[0])
    sz2 = int(zoom*sz[1])
    x = cv2.resize(x, (sz1, sz2))
    x = random_crop(x, sz[1], sz[0])
    return x

In [21]:
# can change this
def apply_transform_rotation(x, sz=(224, 224), zoom=1.05):
    """ Applies a random crop, rotation"""
    sz1 = int(zoom*sz[0])
    sz2 = int(zoom*sz[1])
    x = cv2.resize(x, (sz1, sz2))
    x = rotate_cv(x, np.random.uniform(-10,10))
    return x

In [22]:
def apply_transform_rgb(x, sz=(224, 224)):
    """Applies grayscale transformation."""
    #zoom in function was creating images of varying shape
    #wrapped the A.ToGray() transformation inside A.Compose([])
    transform = A.Compose([
        A.ToGray(),
        A.Resize(height=sz[0], width=sz[1])  # Ensure all images are resized to 224x224
    ])
    #passed the image x to the transform function
    augmented = transform(image=x)
    #extracted the transformed image from the dictionary returned by the transformation using the key 'image'
    x = augmented['image']
    return x


In [23]:
def apply_transform_dropout(x, sz=(224, 224)):
    """Applies dropout transformation."""
    #Sample per image a value p from the range 0<=p<=0.2 and then drop p percent of all pixels in the image (i.e. convert them to black pixels)
    transform = iaa.Sequential([
      iaa.Dropout(p=(0, 0.2)),
      iaa.Resize({"height": sz[0], "width": sz[1]})
    ])
    #passed the image x to the transform function
    augmented = transform(image=x)
    #the returned type is an nd arrays
    x = augmented
    return x

In [24]:
def apply_transform_blur(x, sz=(224, 224)):
    """Create an augmenter that always pools with a kernel size of 2 x 2"""
    transform = iaa.Sequential([
      iaa.imgcorruptlike.GaussianBlur(severity=2),
      iaa.Resize({"height": sz[0], "width": sz[1]})
    ])
    #passed the image x to the transform function
    x = transform(image=x)
    #extracted the transformed image from the dictionary returned by the transformation using the key 'image'
    return x

In [25]:
def apply_transform_sigmoid(x, sz=(224, 224)):
    """Applying the sigmoid contrast"""
    transform = iaa.Sequential([
      iaa.SigmoidContrast(gain=(3, 10), cutoff=(0.4, 0.6)),
      iaa.Resize({"height": sz[0], "width": sz[1]}),
      ])
    #passed the image x to the transform function
    augmented = transform(image=x)
    #extracted the transformed image from the dictionary returned by the transformation using the key 'image'
    x = augmented
    return x

In [26]:
class CUBCrop(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True,
                 transform=False):

        self.files_path = files_path
        self.original_file_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name

        if train:
          mask = self.train_test.is_train.values == 1

        else:
          mask = self.train_test.is_train.values == 0


        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]



    def __len__(self):
        return self.num_files

    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1

        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        x = apply_transform_crop(x)
        # After reading and transforming the image:
        if x.shape[1:] != (224, 224):
          x = cv2.resize(x, (224, 224))
        x = normalize(x)

        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W)
        return x,y

In [27]:
class CUBRotate(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True,
                 transform=False):

        self.files_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name

        if train:
          mask = self.train_test.is_train.values == 1

        else:
          mask = self.train_test.is_train.values == 0


        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]



    def __len__(self):
        return self.num_files

    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1

        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        x = apply_transform_rotation(x)
        x = normalize(x)
        # After reading and transforming the image:
        if x.shape[1:] != (224, 224):
          x = cv2.resize(x, (224, 224))
        # we can test this code but it will only be run during the Dataloader call so we have to test it
        # new_path = self.files_path/'images_rotated'/file_name[:-4]+'_rotated.jpg'
        # cv2.imwrite(new_path, x)
        # print("writing to path " + new_path)
        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W)
        return x,y

In [28]:
class CUBRGB(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True,
                 transform=False):

        self.files_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name

        if train:
          mask = self.train_test.is_train.values == 1

        else:
          mask = self.train_test.is_train.values == 0


        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]



    def __len__(self):
        return self.num_files

    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1

        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        x = apply_transform_rgb(x)
        # After reading and transforming the image:
        if x.shape[1:] != (224, 224):
          x = cv2.resize(x, (224, 224))
        x = normalize(x)
        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W)
        return x,y

In [29]:
class CUBDropout(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True,
                 transform=False):

        self.files_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name

        if train:
          mask = self.train_test.is_train.values == 1

        else:
          mask = self.train_test.is_train.values == 0


        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]



    def __len__(self):
        return self.num_files

    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1

        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        x = apply_transform_dropout(x)
        # After reading and transforming the image:
        if x.shape[1:] != (224, 224):
          x = cv2.resize(x, (224, 224))
        x = normalize(x)
        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W)
        return x,y

In [30]:
class CUBImageCorrupt(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True,
                 transform=False):

        self.files_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name

        if train:
          mask = self.train_test.is_train.values == 1

        else:
          mask = self.train_test.is_train.values == 0


        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]



    def __len__(self):
        return self.num_files

    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1

        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        x = apply_transform_blur(x)
        # After reading and transforming the image:
        if x.shape[1:] != (224, 224):
          x = cv2.resize(x, (224, 224))
        x = normalize(x)
        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W)
        return x,y

In [31]:
class CUBSigmoid(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True,
                 transform=False):

        self.files_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name

        if train:
          mask = self.train_test.is_train.values == 1

        else:
          mask = self.train_test.is_train.values == 0


        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]



    def __len__(self):
        return self.num_files

    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1

        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        x = apply_transform_sigmoid(x)
        # After reading and transforming the image:
        if x.shape[1:] != (224, 224):
          x = cv2.resize(x, (224, 224))
        x = normalize(x)
        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W)
        return x,y

In [32]:
class CUB(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True,
                 transform=False):

        self.files_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name

        if train:
          mask = self.train_test.is_train.values == 1

        else:
          mask = self.train_test.is_train.values == 0


        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]



    def __len__(self):
        return self.num_files

    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1

        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        x = cv2.resize(x, (224,224))
        x = normalize(x)
        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W)
        return x,y

In [33]:
train_dataset = CUB(PATH, labels, train_test, images, train= True, transform= True)
baseline_train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
# small_train_dataset = torch.utils.data.Subset(train_dataset, range(0, 500))
# small_train_loader = DataLoader(small_train_dataset, batch_size=64, shuffle=True, num_workers=2)

In [34]:
val_dataset = CUB(PATH, labels, train_test, images, train= False, transform= True)
baseline_val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True, num_workers=2)
# small_val_dataset = torch.utils.data.Subset(val_dataset, range(0, 500))
# small_val_loader = DataLoader(small_val_dataset, batch_size=64, shuffle=True, num_workers=2)

In [35]:
train_dataset_cropped = CUBCrop(PATH, labels, train_test, images, train= False, transform= True)
train_dataset_rotated = CUBRotate(PATH, labels, train_test, images, train= False, transform= True)
train_dataset_gray = CUBRGB(PATH, labels, train_test, images, train= False, transform= True)
train_dataset_sigmoid = CUBSigmoid(PATH, labels, train_test, images, train= False, transform= True)
train_dataset_dropout = CUBDropout(PATH, labels, train_test, images, train= False, transform= True)
train_dataset_imagecorrupt = CUBImageCorrupt(PATH, labels, train_test, images, train= False, transform= True)

In [36]:
# we can print the type here

In [37]:
concat_train_dataset = ConcatDataset((train_dataset, train_dataset_cropped, train_dataset_rotated, train_dataset_gray, train_dataset_sigmoid, train_dataset_dropout, train_dataset_imagecorrupt))
augmented_train_loader = DataLoader(concat_train_dataset, batch_size=64, shuffle=True, num_workers=2)

In [38]:
print(len(train_dataset),len(concat_train_dataset))

5994 40758


In [39]:
pwd

'/content/drive/My Drive/training'

In [40]:
# Function for calculating metrics
def metrics(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device).float(), labels.to(device).long()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, pred = torch.max(outputs, 1)
            correct += (pred == labels).sum().item()
            total += labels.size(0)
            sum_loss += loss.item() * labels.size(0)
    return sum_loss / total, correct / total

In [41]:
# Check for available CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [42]:
# Initialize lists to store metrics
train_losses = []
val_losses = []
train_accuracy = []
val_accuracy = []

# Model Architecture
resnet50 = models.resnet50(pretrained=True).to(device)

# Freeze layers
for param in resnet50.parameters():
    param.requires_grad = False

num_of_classes = len(np.unique(train_dataset.labels['label']))

# Modify last layer
num_ftrs = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_ftrs, num_of_classes).to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet50.fc.parameters(), lr=0.001, momentum=0.9)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 88.6MB/s]


In [43]:
# from torch.utils.data import RandomSampler
#   small_sampler = RandomSampler(train_dataset, replacement=True, num_samples=10)
#   small_train_loader = DataLoader(train_dataset, batch_size=64, sampler=small_sampler, num_workers=2)
#   small_val_loader  = DataLoader(val_dataset, batch_size=64, sampler=small_sampler, num_workers=2)


Define Model(ResNet50).

In [None]:
# Training Loop
for epoch in range(10):
  resnet50.train()
  for inputs, labels in augmented_train_loader:
    inputs, labels = inputs.to(device).float(), labels.to(device).long()
    optimizer.zero_grad()
    outputs = resnet50(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

  # Training metrics
  train_loss, train_acc = metrics(resnet50, augmented_train_loader)
  train_losses.append(train_loss)
  train_accuracy.append(train_acc)

  # Validation metrics
  val_loss, val_acc = metrics(resnet50, baseline_val_loader)
  val_losses.append(val_loss)
  val_accuracy.append(val_acc)

  print(f'Epoch {epoch+1}, Train Loss: {train_loss}, Train Acc: {train_acc}, Val Loss: {val_loss}, Val Acc: {val_acc}')

# Save the model
torch.save(resnet50.state_dict(), './model_weights/resnet50_cub_baseline_7n.pth')



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichannel=True)
  x = gaussian(np.array(x) / 255., sigma=c, multichan

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-44-70e21d22410d>", line 4, in <cell line: 2>
    for inputs, labels in augmented_train_loader:
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 630, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1345, in _next_data
    return self._process_data(data)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1371, in _process_data
    data.reraise()
  File "/usr/local/lib/python3.10/dist-packages/torch/_utils.py", line 694, in reraise
    raise exception
cv2.error: Caught error in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/worker

In [None]:
# Plotting
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.title("Training and Validation Loss: Concat (7n) Dataset")
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.title("Training and Validation Accuracy:Concat (4n) Dataset")
plt.plot(train_accuracy, label="Training Accuracy")
plt.plot(val_accuracy, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    #with torch.no_grad():
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        # correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)

        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [None]:
def calc_accuracy(model, dataloader):
    model.eval()
    model.cuda()

    top1 = AverageMeter()
    top5 = AverageMeter()

    for idx, (inputs, labels) in enumerate(dataloader):

        inputs, labels = inputs.cuda().float(), labels.cuda().long()
        # obtain the outputs from the model
        outputs = model.forward(inputs)
        prec1, prec5 = accuracy(outputs, labels, topk=(1, 5))
        top1.update(prec1[0], inputs.size(0))
        top5.update(prec5[0], inputs.size(0))

    return top1 ,top5

In [None]:
# Initialize the model architecture
resnet50 = models.resnet50(pretrained=False)
num_ftrs = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_ftrs, num_of_classes)  # Replace with your number of classes

# Load the saved state dictionary
resnet50.load_state_dict(torch.load('./model_weights/resnet50_cub_baseline_4n.pth'))

# Move to appropriate device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet50 = resnet50.to(device)

# Set to evaluation mode
resnet50.eval()
# Run inference

top1 ,top5 = calc_accuracy(resnet50, baseline_val_loader)
print("top1 avg", top1.avg)
print("top5 avg",top5.avg)


In [None]:
from sklearn.metrics import classification_report
import numpy as np

# # Switch model to evaluation mode
# resnet50.eval()

# Store all predictions and true labels
all_preds = []
all_labels = []

# Loop through validation data
with torch.no_grad(): # No need to compute gradients when evaluating
    for inputs, labels in baseline_val_loader:
        inputs, labels = inputs.to(device).float(), labels.to(device).long()

        # Forward pass
        outputs = resnet50(inputs)

        # Get the predicted class labels
        _, preds = torch.max(outputs, 1)

        # Append to lists
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert lists to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Generate classification report
report = classification_report(all_labels, all_preds)
print(report)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(all_labels, all_preds)



plt.figure(figsize=(100, 100))  # You may want to adjust this based on how many classes you have
sns.heatmap(cm, annot=True, cmap='Blues', fmt='g')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.savefig('augmented_4n_confusion_matrix.png')
plt.show()


In [None]:
#t-sne map for feature space visualization of validation set
from sklearn.manifold import TSNE

embeddings = []
labels_list = []

def extract_embeddings(model, loader):
    with torch.no_grad():
        for inputs, labels in loader:
            # inputs = inputs.to(device)
            inputs = inputs.to(device).float()


            # Forward pass to get the embeddings before the final layer
            x = model.conv1(inputs)
            x = model.bn1(x)
            x = model.relu(x)
            x = model.maxpool(x)
            x = model.layer1(x)
            x = model.layer2(x)
            x = model.layer3(x)
            x = model.layer4(x)
            x = model.avgpool(x)
            embedding = torch.flatten(x, 1)

            embeddings.append(embedding.cpu().numpy())
            labels_list.append(labels.cpu().numpy())
    return np.vstack(embeddings), np.concatenate(labels_list)

embeddings, labels = extract_embeddings(resnet50, baseline_val_loader)


tsne = TSNE(n_components=2, random_state=42)
reduced_embeddings = tsne.fit_transform(embeddings)

In [None]:
plt.figure(figsize=(10,10))
scatter = plt.scatter(reduced_embeddings[:, 0], reduced_embeddings[:, 1], c=labels, cmap='viridis', s=5)
legend = plt.legend(*scatter.legend_elements(), loc="upper right", title="Classes")
# plt.add_artist(legend)
plt.title("t-SNE visualization of feature embeddings")
plt.show()

In [None]:
#top confused pairs -10
def get_top_confused_classes(confusion_matrix, num_pairs=10):
    # Get indices of non-diagonal values (i.e., exclude true positives)
    rows, cols = np.where(np.triu(confusion_matrix, 1) > 0)

    # Get the values at these indices
    confusions = confusion_matrix[rows, cols]

    # Sort them in descending order
    sorted_indices = np.argsort(confusions)[::-1]

    # Get the top confused pairs and their values
    top_rows = rows[sorted_indices][:num_pairs]
    top_cols = cols[sorted_indices][:num_pairs]
    top_values = confusions[sorted_indices][:num_pairs]

    return top_rows, top_cols, top_values

top_rows, top_cols, top_values = get_top_confused_classes(cm, num_pairs=10)
top_confused_pairs = list(zip(top_rows, top_cols, top_values))
top_confused_pairs
#class x, class y, number of times model was confused
# we can conduct visual similarity check, data distribution for class imbalance, sample review, aggresive regularization/augmentation.

Validation Metric