## Abstract

## Introduction

## Implementation

### Utils & Libraries

In [1]:
%%capture
!pip install gdown

In [2]:
# Generic python libraries
import os
import math
import random
from PIL import Image
from random import sample

# PyTorch and numpy related
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split

# 'external' tool file created by us for plotting
if not os.path.exists('charts.py'):
  !wget https://raw.githubusercontent.com/davidrojas0791/RVCDL_T1/master/utils/charts.py

from charts import create_figure, plot_figure, plot_multiBar

In [3]:
# if not os.path.exists('./train_dataset.pt'):
#   !gdown --id 1LbfOVhiQCnOgghRfs5CDMMtZEmzjuq0E
# !gdown "https://drive.google.com/u/0/uc?export=download&confirm=gS3O&id=1xDNnF7wJKuZFPWJ1wV2iVIaDhGgAbjU2" 

In [4]:
def count_parameters(model):
  return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [5]:
def generate_comparison_charts(train_loss, val_loss, train_acc, val_acc):
  loss_chart_data = [
    {
      "y_data": train_loss,
      "label": "train loss",
      "color": "#0f0"
    },
    {
      "y_data": val_loss,
      "label": "validation loss",
      "color": "#00f"
    }
  ]

  fig = create_figure()
  plot_figure(fig, loss_chart_data, "Loss Comparison", "Epochs", "Loss value")
  fig.show()

  acc_chart_data = [
    {
      "y_data": train_acc,
      "label": "train acc",
      "color": "#0f0"
    },
    {
      "y_data": val_acc,
      "label": "validation acc",
      "color": "#00f"
    }
  ]

  fig = create_figure()
  plot_figure(fig, acc_chart_data, "Acc Comparison", "Epochs", "Acc value")
  fig.show()


In [6]:
# Method to apply specific initialization for weights
def weight_init(m):
  if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
    nn.init.xavier_normal_(m.weight, gain=nn.init.calculate_gain('relu'))
    nn.init.zeros_(m.bias)

### AlexNet

#### Model definition

In [7]:
class AlexNet(nn.Module):
  def __init__(self, number_of_classes):
    super(AlexNet, self).__init__()
    self.network_sequence = nn.Sequential(
      nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=0),
      nn.BatchNorm2d(96),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=3,stride=2,padding=0),  
      
      nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,stride=1,padding=0),
      nn.BatchNorm2d(256),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=3,stride=2,padding=0),

      nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,stride=1,padding=0),
      nn.BatchNorm2d(384),
      nn.ReLU(),

      nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,stride=1,padding=0),
      nn.BatchNorm2d(384),
      nn.ReLU(),

      nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=0),
      nn.BatchNorm2d(256),
      nn.ReLU(),

      nn.Flatten(),

      nn.Linear(in_features=4096,out_features=1024),
      nn.BatchNorm1d(1024),
      nn.ReLU(),

      nn.Linear(in_features=1024,out_features=1024),
      nn.BatchNorm1d(1024),
      nn.ReLU(),

      nn.Linear(in_features=1024,out_features=number_of_classes),

    )
  def forward(self, input):
    return self.network_sequence(input)

### ResNet 50

#### Model Definition

In [8]:
class Conv1x1(nn.Module):
  def __init__(self, **kwargs):
    super(Conv1x1, self).__init__()
    self.conv = nn.Conv2d(
      kernel_size=1,
      **kwargs
    )
  def forward(self, input):
    return self.conv(input)

In [9]:
class Conv3x3(nn.Module):
  def __init__(self, **kwargs):
    super(Conv3x3, self).__init__()
    self.conv = nn.Conv2d(
      kernel_size=3,
      **kwargs
    )
  def forward(self, input):
    return self.conv(input)

In [10]:
class ResidualBottleNeckBlock(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, expansion_factor=4, groups=1, increase_initial_stride=False):
    super(ResidualBottleNeckBlock, self).__init__()
    self.seq_1 = nn.Sequential(
      Conv1x1(
        in_channels=in_channels,
        out_channels=out_channels,
        stride=2 if increase_initial_stride else 1,
        bias=False
      ),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(),
      Conv3x3(
        in_channels=out_channels,
        out_channels=out_channels,
        stride=1,
        padding=1,
        groups=groups,
        bias=False
      ),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(),
      Conv1x1(
        in_channels=out_channels,
        out_channels=expansion_factor * out_channels,
        stride=1,
        bias=False
      ),
      nn.BatchNorm2d(expansion_factor * out_channels),
      nn.ReLU()
    )
    
    self.increase_initial_stride = increase_initial_stride
    self.in_ch = in_channels
    self.out_ch = expansion_factor * out_channels
    self.rel = nn.ReLU()
    self.batch = nn.BatchNorm2d(in_channels)

    self.conv_aux = Conv1x1(
      in_channels=in_channels,
      out_channels=expansion_factor * out_channels,
      stride=2 if increase_initial_stride else 1,
      bias=False
    )
  

  def forward(self, x):

    # Pre activation
    x = self.rel(self.batch(x))

    partial_res = self.seq_1(x)

    # Using additional 1x1 conv layer to be able to add the residual
    # In the specific needed cases
    if self.in_ch != self.out_ch or self.increase_initial_stride:
      x = self.conv_aux(x)

    # Applying residual connection
    partial_res = partial_res + x
    
    return partial_res

In [11]:
class ResNet50(nn.Module):
  def __init__(self, number_of_classes):
    super(ResNet50, self).__init__()
    self.network_sequence = nn.Sequential(
      nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3,bias=False),
      nn.MaxPool2d(kernel_size=3,stride=2,padding=1),
      
      # RGroup 1
      ResidualBottleNeckBlock(64,64,3),
      ResidualBottleNeckBlock(256,64,3),
      ResidualBottleNeckBlock(256,64,3),

      # RGroup 2
      ResidualBottleNeckBlock(256,128,3,increase_initial_stride=True),
      ResidualBottleNeckBlock(512,128,3),
      ResidualBottleNeckBlock(512,128,3),
      ResidualBottleNeckBlock(512,128,3),

      # RGroup 3
      ResidualBottleNeckBlock(512,256,3,increase_initial_stride=True),
      ResidualBottleNeckBlock(1024,256,3),
      ResidualBottleNeckBlock(1024,256,3),
      ResidualBottleNeckBlock(1024,256,3),
      ResidualBottleNeckBlock(1024,256,3),
      ResidualBottleNeckBlock(1024,256,3),

      # RGroup 4
      ResidualBottleNeckBlock(1024,512,3,increase_initial_stride=True),
      ResidualBottleNeckBlock(2048,512,3),
      ResidualBottleNeckBlock(2048,512,3),

      # activation of the last block
      nn.BatchNorm2d(2048),
      nn.ReLU(),

      # Output block and classifier
      nn.AvgPool2d(kernel_size=7,stride=1),
      nn.Flatten(),
      nn.Linear(2048,number_of_classes)
    )
  def forward(self, x):
    return self.network_sequence(x)

### ResNext50

#### Model Definition

In [12]:
class ResNext50(nn.Module):
  def __init__(self, number_of_classes):
    super(ResNext50, self).__init__()
    self.network_sequence = nn.Sequential(
      nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3,bias=False),
      nn.MaxPool2d(kernel_size=3,stride=2,padding=1),
      
      # RGroup 1
      ResidualBottleNeckBlock(64,128,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(256,128,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(256,128,3,expansion_factor=2,groups=32),

      # RGroup 2
      ResidualBottleNeckBlock(256,256,3,increase_initial_stride=True,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(512,256,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(512,256,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(512,256,3,expansion_factor=2,groups=32),

      # RGroup 3
      ResidualBottleNeckBlock(512,512,3,increase_initial_stride=True,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(1024,512,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(1024,512,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(1024,512,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(1024,512,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(1024,512,3,expansion_factor=2,groups=32),

      # RGroup 4
      ResidualBottleNeckBlock(1024,1024,3,increase_initial_stride=True,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(2048,1024,3,expansion_factor=2,groups=32),
      ResidualBottleNeckBlock(2048,1024,3,expansion_factor=2,groups=32),

      # activation of the last block
      nn.BatchNorm2d(2048),
      nn.ReLU(),

      # Output block and classifier
      nn.AvgPool2d(kernel_size=7,stride=1),
      nn.Flatten(),
      nn.Linear(2048,number_of_classes)
    )
  def forward(self, x):
    return self.network_sequence(x)

### Loading and Processing dataset

In [13]:
class ClothingDataset(Dataset):
  def __init__(self, txt_file, max_entries=1000):
    self.src_file = txt_file
    self.images_data = []
    self.images_classes = []
    
    self.preprocess_pipeline = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor()
    ])

    with open(f"./clothing-small/{txt_file}", "r") as img_file:
      lines = img_file.readlines()
      # Random sampling 10k of entries from the file if it's bigger than that
      for line in lines:
        img_path, img_class = line.split()
        img_path = f"./clothing-small/{img_path}"
        image = Image.open(img_path)
        image = self.preprocess_pipeline(image)
        self.images_data.append(image)
        self.images_classes.append(int(img_class))

  def __len__(self):
    return len(self.images_data)

  def __getitem__(self, idx):
    return self.images_data[idx], self.images_classes[idx]

In [14]:
if not os.path.exists('./clothing-small/test_sample.txt'):
  !gdown --id 1M7J6VENHBks1Up-n_fhR8wI9GyL4XxjT
  !unzip -q VR_T1_clothing_small.zip

In [15]:
dataset = ClothingDataset("train_sample.txt")

In [16]:
train_dataset_size = int(len(dataset) * 0.8)
val_dataset_size = len(dataset) - train_dataset_size
train_dataset, val_dataset = random_split(dataset, [train_dataset_size, val_dataset_size])

### Training and Test

In [35]:
def train(data, network, optimizer, criterion, device):
  network.train()
  acc_c = 0
  loss_c = 0
  for input, target in data:
    input, target = input.to(device).float(), target.to(device)
    output = network(input)
    current_loss = criterion(output,target)
    loss_c += current_loss.item()
    acc_c += torch.sum((torch.argmax(output, 1) == target)).item()
    current_loss.backward()
    optimizer.step()
  return loss_c , acc_c

def validation(data, network, criterion, device):
  network.eval()
  acc_c = 0
  loss_c = 0
  with torch.no_grad():
    for input, target in data:
      input, target = input.to(device).float(), target.to(device)
      output = network(input)
      current_loss = criterion(output,target)
      loss_c += current_loss.item()
      acc_c += torch.sum((torch.argmax(output, 1) == target)).item()
    return loss_c , acc_c

def test(data, network, criterion, device, number_of_classes):
  network.eval()
  class_acc = [0] * number_of_classes
  class_miss = [0] * number_of_classes
  loss_c = 0
  with torch.no_grad():
    for input, target in data:
      input, target = input.to(device).float(), target.to(device)
      output = network(input)
      current_loss = criterion(output,target)
      loss_c += current_loss.item()
      for item1, item2 in list(zip(torch.argmax(output,1), target)):
        if (item1.item() == item2.item()):
          class_acc[item2.item()] += 1
        else:
          class_miss[item2.item()] += 1

    return loss_c, [ float(class_acc[i])/float(class_miss[i] + class_acc[i]) for i in range(number_of_classes) ], sum(class_acc)

#run epochs -> 
def run_epochs(network, train_dataset, val_dataset, optimizer, criterion, epochs=20, batch_size=64, device='cuda', collate=None):
  network.to(device)
  train_data = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,collate_fn=collate)
  val_data = DataLoader(val_dataset,batch_size=batch_size,shuffle=True,collate_fn=collate)
  train_loss, train_acc = [], []
  val_loss, val_acc = [], []
  for e in range(1,epochs+1):
    temp_train_loss, temp_train_acc = train(train_data, network, optimizer, criterion, device)
    temp_val_loss, temp_val_acc = validation(val_data, network, criterion, device)

    train_loss.append(temp_train_loss/len(train_dataset)*batch_size)
    train_acc.append(temp_train_acc/len(train_dataset)*100)

    val_loss.append(temp_val_loss/len(val_dataset)*batch_size)
    val_acc.append(temp_val_acc/len(val_dataset)*100)

    print(f"Epoch: {e} \n Train | Acc: {train_acc[-1]:.3f}% | loss: {train_loss[-1]:.3f} \n Val   | Acc: {val_acc[-1]:.3f}% | loss: {val_loss[-1]:.3f}")
  generate_comparison_charts(train_loss,val_loss,train_acc,val_acc)
  

In [18]:
LR = 1e-5
criterion = nn.CrossEntropyLoss()

In [19]:
alex_model = AlexNet(19)
optimizer = optim.AdamW(alex_model.parameters(),lr=LR)
run_epochs(alex_model,train_dataset,val_dataset,optimizer,criterion)

Epoch: 1 
 Train | Acc: 18.928% | loss: 2.606 
 Val   | Acc: 22.186% | loss: 2.512
Epoch: 2 
 Train | Acc: 24.561% | loss: 2.381 
 Val   | Acc: 25.063% | loss: 2.350
Epoch: 3 
 Train | Acc: 27.603% | loss: 2.288 
 Val   | Acc: 27.205% | loss: 2.264
Epoch: 4 
 Train | Acc: 29.139% | loss: 2.214 
 Val   | Acc: 29.925% | loss: 2.199
Epoch: 5 
 Train | Acc: 31.738% | loss: 2.144 
 Val   | Acc: 30.238% | loss: 2.165
Epoch: 6 
 Train | Acc: 32.641% | loss: 2.097 
 Val   | Acc: 32.755% | loss: 2.133
Epoch: 7 
 Train | Acc: 34.455% | loss: 2.048 
 Val   | Acc: 32.442% | loss: 2.104
Epoch: 8 
 Train | Acc: 35.038% | loss: 2.025 
 Val   | Acc: 35.194% | loss: 2.042
Epoch: 9 
 Train | Acc: 37.078% | loss: 1.969 
 Val   | Acc: 34.459% | loss: 2.056
Epoch: 10 
 Train | Acc: 38.278% | loss: 1.929 
 Val   | Acc: 35.616% | loss: 1.998
Epoch: 11 
 Train | Acc: 39.318% | loss: 1.888 
 Val   | Acc: 36.867% | loss: 1.988
Epoch: 12 
 Train | Acc: 40.421% | loss: 1.844 
 Val   | Acc: 37.633% | loss: 1.972
E

In [20]:
resnet_model = ResNet50(19)
optimizer = optim.AdamW(resnet_model.parameters(),lr=LR)
run_epochs(resnet_model,train_dataset,val_dataset,optimizer,criterion)

Epoch: 1 
 Train | Acc: 9.875% | loss: 2.878 
 Val   | Acc: 8.599% | loss: 2.963
Epoch: 2 
 Train | Acc: 12.599% | loss: 2.737 
 Val   | Acc: 11.882% | loss: 2.709
Epoch: 3 
 Train | Acc: 14.675% | loss: 2.649 
 Val   | Acc: 15.979% | loss: 2.616
Epoch: 4 
 Train | Acc: 17.791% | loss: 2.544 
 Val   | Acc: 19.684% | loss: 2.491
Epoch: 5 
 Train | Acc: 21.512% | loss: 2.438 
 Val   | Acc: 23.108% | loss: 2.408
Epoch: 6 
 Train | Acc: 23.365% | loss: 2.357 
 Val   | Acc: 23.671% | loss: 2.332
Epoch: 7 
 Train | Acc: 26.164% | loss: 2.267 
 Val   | Acc: 27.079% | loss: 2.250
Epoch: 8 
 Train | Acc: 27.947% | loss: 2.193 
 Val   | Acc: 30.879% | loss: 2.179
Epoch: 9 
 Train | Acc: 30.863% | loss: 2.111 
 Val   | Acc: 31.316% | loss: 2.096
Epoch: 10 
 Train | Acc: 32.841% | loss: 2.038 
 Val   | Acc: 34.084% | loss: 2.030
Epoch: 11 
 Train | Acc: 34.862% | loss: 1.977 
 Val   | Acc: 35.866% | loss: 1.977
Epoch: 12 
 Train | Acc: 36.617% | loss: 1.916 
 Val   | Acc: 36.836% | loss: 1.951
Epo

In [21]:
resnext_model = ResNext50(19)
optimizer = optim.AdamW(resnext_model.parameters(),lr=LR)
run_epochs(resnext_model,train_dataset,val_dataset,optimizer,criterion)

Epoch: 1 
 Train | Acc: 9.675% | loss: 2.947 
 Val   | Acc: 12.383% | loss: 3.019
Epoch: 2 
 Train | Acc: 11.966% | loss: 2.857 
 Val   | Acc: 12.555% | loss: 2.904
Epoch: 3 
 Train | Acc: 14.683% | loss: 2.732 
 Val   | Acc: 16.948% | loss: 2.738
Epoch: 4 
 Train | Acc: 18.412% | loss: 2.632 
 Val   | Acc: 20.216% | loss: 2.617
Epoch: 5 
 Train | Acc: 20.593% | loss: 2.526 
 Val   | Acc: 23.014% | loss: 2.499
Epoch: 6 
 Train | Acc: 23.279% | loss: 2.421 
 Val   | Acc: 24.375% | loss: 2.470
Epoch: 7 
 Train | Acc: 25.894% | loss: 2.289 
 Val   | Acc: 26.876% | loss: 2.364
Epoch: 8 
 Train | Acc: 29.076% | loss: 2.173 
 Val   | Acc: 30.175% | loss: 2.260
Epoch: 9 
 Train | Acc: 32.466% | loss: 2.084 
 Val   | Acc: 31.520% | loss: 2.292
Epoch: 10 
 Train | Acc: 34.076% | loss: 2.024 
 Val   | Acc: 34.522% | loss: 2.218
Epoch: 11 
 Train | Acc: 36.414% | loss: 1.939 
 Val   | Acc: 36.257% | loss: 2.037
Epoch: 12 
 Train | Acc: 37.860% | loss: 1.884 
 Val   | Acc: 37.023% | loss: 1.939
Ep

In [None]:
# To be used only after training and validation process
test_dataset = ClothingDataset("test_sample.txt")

In [23]:
test_data = DataLoader(test_dataset,batch_size=64,shuffle=True)

In [41]:
# Testing AlexNet model
alex_test_loss, alex_test_acc_by_class, alex_total_acc = test(test_data, alex_model, criterion, "cuda", 19) 

In [51]:
print(f"{alex_total_acc/len(test_dataset)*100:.1f}%")

39.4%


In [40]:
# Testing ResNet50 model
resnet_test_loss, resnet_test_acc_by_class, resnet_total_acc = test(test_data, resnet_model, criterion, "cuda", 19) 

In [52]:
print(f"{resnet_total_acc/len(test_dataset)*100:.1f}%")

40.1%


In [46]:
# Testing ResNext50 model
resnext_test_loss, resnext_test_acc_by_class, resnext_total_acc = test(test_data, resnext_model, criterion, "cuda", 19) 

In [53]:
print(f"{resnext_total_acc/len(test_dataset)*100:.1f}%")

43.5%


In [27]:
bar_fig = create_figure()
data_x = ["shorts","jackets_parka","sweater","skirts","dress","blouse_shirts","tee","pants","shoes","belt","coat","scarf","bag","socks","jumper","blazer","hat","glasses","tights"]
labels = ["AlexNet", "ResNet50", "ResNext50"]
data_y = [alex_test_acc_by_class, resnet_test_acc_by_class, resnext_test_acc_by_class]
titles = {'title':'Acc by algorithm', 'x_title': 'clases', 'y_title': 'Acc %'}
plot_multiBar(bar_fig, labels,data_x,data_y,titles)
bar_fig.show()