# Setup Environment

Import required packages:

In [None]:
import copy, os, time, shutil
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np
import itertools
import seaborn as sn
import pandas as pd
import cv2
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import torch
import torch.nn as nn 
import torchvision
from torchvision import datasets, models, transforms
from torchvision.io import read_image
from torch.utils.data import sampler, Dataset

Make PyTorch use the GPU:

In [None]:
assert torch.cuda.is_available()
device = torch.device('cuda')

Global variables and settings:

In [None]:
path_dir_project = "/thecube/students/jravagli"
path_dir_output = os.path.join(path_dir_project, "outputs", "resnet50-ft-final")

# Paths datasets
path_dir_datasets = os.path.join(path_dir_project, "datasets", "used-pp")
path_test_dataset = os.path.join(path_dir_datasets, "test")
# Path annotation files
path_test_ann = os.path.join(path_dir_datasets, "test.txt")
# Path models
path_models_folder = os.path.join(path_dir_output, "models")
path_model = os.path.join(path_models_folder, "resnet50-ft-best.pth")

model_type = "resnet50" # otherwise "resnet50-embed"
n_classes = 14
img_height = 150
img_width = 150

# Resnet50 with embedding parameters
n_embedding_feats = 10
n_dense_units = 512

batch_size = 64

n_clothes_classes = 13
orig_class_names = ["concert", "graduation", "meeting", "mountain-trip", "picnic",
               "sea-holiday", "ski-holiday", "wedding", "conference", "exhibition", "fashion",
               "protest", "sport", "theater-dance"]

# Data Loading

In [None]:
# Parameters
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
size = [img_height, img_width]
data_transform = transforms.Compose([
        transforms.Resize(size),
        transforms.Normalize(mean, std=std)
    ])

Define the custom dataset that reads the data from a csv file:

In [None]:
class UsedDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.np_data = pd.read_csv(csv_file).to_numpy()
        self.transform = transform

    def __len__(self):
        return len(self.np_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        path_image = self.np_data[idx, 0]
        # Read the image in a PyTorch tensor and squeeze values in [0, 1]
        image = read_image(path_image)/255.
        label = self.np_data[idx, 1]
        clothes = self.np_data[idx, 2]

        if self.transform:
            image = self.transform(image)
            
        sample = {"image": image, "label": label, "clothes": clothes, "path": path_image}

        return sample

In [None]:
dataset = UsedDataset(path_test_ann, transform=data_transform)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=False, num_workers=4)

dataset_size = len(dataset)

# Model

Define the custom Resnet50 model with the embedding layer:

In [None]:
# Utility layer to delete existing layers from the pretrained network
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

class TwoHeadsResNet(nn.Module):
    def __init__(self):
        super(TwoHeadsResNet, self).__init__()
        
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = Identity()
        
        self.flatten = nn.Flatten()
        self.embed = nn.Embedding(n_clothes_classes, n_embedding_feats)
        self.fc_1 = nn.Linear(2048, n_dense_units)
        self.relu = nn.ReLU()
        self.do = nn.Dropout(p=0.5)
        self.fc_2 = nn.Linear(n_dense_units + n_embedding_feats, n_classes)


    def forward(self, image, additional_data):
        x_image = self.resnet(image)
        x_image = self.flatten(x_image)
        x_image = self.fc_1(x_image)
        x_image = self.relu(x_image)
        x_image = self.do(x_image)
        x_data = self.embed(additional_data)
        x = torch.cat((x_image, x_data), dim=1)
        x = self.fc_2(x)
        
        return x

Function to build the different types of models used in the experiments:

In [None]:
def build_vgg16():
    model = models.vgg16(pretrained=True)
    classifier = nn.Sequential(
        nn.Linear(in_features=model.classifier[0].in_features, out_features=512),
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=512, out_features=512),
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=512, out_features=n_classes)
    )
    model.classifier = classifier
    
    return model

def build_resnet50():
    model = models.resnet50(pretrained=False)
    
    # Replace the last layer with a custom classifier
    model.fc = nn.Linear(model.fc.in_features, 512)
    model.relu = nn.ReLU()
    model.do = nn.Dropout(p=0.5)
    model.fc_2 = nn.Linear(in_features=512, out_features=n_classes)
    
    return model

def build_resnet50_embed():
    return TwoHeadsResNet()

Build the desired model:

In [None]:
if model_type == "vgg16":
    model = build_vgg16()
elif model_type == "resnet50":
    model = build_resnet50()
else:
    model = build_resnet50_embed()

Load the trained weights:

In [None]:
model.load_state_dict(torch.load(path_model))
model.eval()
model.to(device)

# Evaluation

Evaluate the model on the test set:

In [None]:
n_correct = 0

all_paths = np.empty(dataset_size, dtype=object)
all_preds = torch.zeros(dataset_size, dtype=torch.int).to(device)
all_labels = torch.zeros(dataset_size, dtype=torch.int).to(device)
with torch.no_grad():
    batch_index = 0
    for batch_data in tqdm(dataloader):
        images = batch_data["image"].to(device)
        clothes = batch_data["clothes"].to(device)
        labels = batch_data["label"].to(device)
        paths = batch_data["path"]
        
        if model_type == "resnet50-embed":
            outputs = model(images, clothes)
        else:
            outputs = model(images)
        _, preds = torch.max(outputs,1)
        
        n_correct += torch.sum(preds == labels.data)
        # Save predictions and labels for further analysis
        all_paths[batch_size*batch_index:batch_size*(batch_index + 1)] = paths
        all_preds[batch_size*batch_index:batch_size*(batch_index + 1)] = preds
        all_labels[batch_size*batch_index:batch_size*(batch_index + 1)] = labels
        
        batch_index += 1
        
acc = n_correct.double() / dataset_size

## Quantitative Evaluation

In [None]:
print(f"Model accuracy: {acc}")

Plot the confusion matrix:

In [None]:
cm = confusion_matrix(all_labels.cpu().numpy(), all_preds.cpu().numpy())

fig, ax = plt.subplots(figsize=(20, 20))
plt.rc('axes',titlesize=18)
plt.rc('xtick',labelsize=16)
plt.rc('ytick',labelsize=16)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=orig_class_names)
disp.plot(xticks_rotation="vertical", ax=ax)

Plot the normalized confusion matrix:

In [None]:
cm = confusion_matrix(all_labels.cpu().numpy(), all_preds.cpu().numpy(), normalize="true")

fig, ax = plt.subplots(figsize=(20, 20))
plt.rc('axes',titlesize=18)
plt.rc('xtick',labelsize=16)
plt.rc('ytick',labelsize=16)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=orig_class_names)
disp.plot(xticks_rotation="vertical", ax=ax)

## Qualitative evaluation

Create a dataframe with the predictions data for a better analysis:

In [None]:
df = pd.DataFrame({"path": all_paths, "pred": all_preds.cpu().numpy(), "ground_truth": all_labels.cpu().numpy()})
df.head()

Filter rows with wrong predictions:

In [None]:
df_errors = df[~(df["pred"] == df["ground_truth"])]
df_errors.head()

Show some images where the model predicts wrong specifying the ground truth class:

In [None]:
gt_class = 9

df_filter = df_errors[df_errors["ground_truth"] == gt_class]
np_data = df_filter.to_numpy()

In [None]:
index = np.random.randint(len(np_data))
data = np_data[index]
path = data[0]
pred = data[1]
gt = data[2]

print(f"Ground truth: {orig_class_names[gt]} - Predicted: {orig_class_names[pred]}")
img = cv2.imread(path)
plt.imshow(img[:, :, ::-1])
plt.show()