**Ablation Study**

Classifiers:
*   K-Nearest Neighbors (KNN)
*   Multi-Layer Perceptron (MLP)



**Install** **requirements**

In [None]:
#!pip3 install 'torch==1.3.1'
#!pip3 install 'torchvision==0.5.0'
#!pip3 install 'Pillow-SIMD'
#!pip3 install 'tqdm'

**Import models and functions**

In [None]:
import os

if not os.path.isdir('./models'):
  !git clone https://github.com/gabrieletiboni/Incremental-learning-on-image-recognition.git
  !cp -r "/content/Incremental-learning-on-image-recognition/CODE/utils" "/content/"
  !cp -r "/content/Incremental-learning-on-image-recognition/CODE/models" "/content/"

**Import libraries**

In [None]:
import sys

import torch
import torch.nn as nn
from torch.backends import cudnn

import datetime
from pytz import timezone
import os
import math
import time
import random


from torchvision.datasets import CIFAR100
from torchvision.transforms import transforms
import torchvision.utils
from torch.utils.data import Subset, DataLoader
import torch.optim as optim

from models.ResNet import resnet32
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
# Below a modified version that best represents the same ResNet32 used by iCaRL
# from models.ResNet_iCaRLVersion import resnet32

# from models.iCaRL import *
from models.iCaRL import *
from utils.utils import *
from copy import deepcopy

import numpy as np
import matplotlib.pyplot as plt

**GLOBAL PARAMETERS**

In [None]:
DEVICE = 'cuda'
NUM_CLASSES = 100
DATA_DIR = './CIFAR_100'
RUNS_DIR = '/content/Incremental-learning-on-image-recognition/RUNS'

# --- CUSTOM PARAMETERS
RANDOM_STATE = 2000          # int or None (Tarantino: 'tarantino', iCaRL: 1993, Telegram: 'telegram')

N_GROUPS_FOR_TRAINING = 10   # Numero di gruppi di classi da usare in fase di training (1: usa solo il primo gruppo, 10: usa tutti i gruppi di classi)

USE_HERDING = False

GITHUB_USER = 0             # 0: Roberto, 1: Alessandro, 2: Gabriele

CIFAR_NORMALIZE = False     # If True normalizes tensor with mean and standard deviation of CIFAR 100

NEIGHBORS = 15              # Number of neighbors considered for KNN

X1 = 64                     # Neurons at hidden layer 1
X2 = 256                    # Neurons at hidden layer 2

METHOD = 'iCARL_KNN'        # 'iCARL_KNN', 'iCARL_MLP'

# ---- MLP parameters

MLP_BATCH_SIZE = 100
MLP_LR = 1e-2
MLP_MOMENTUM = 0.9
MLP_WEIGHT_DECAY = 1e-5
MLP_EPOCHS = 50
MLP_STEP_SIZE = 20
MLP_GAMMA=0.1

# ----------------------

DATA_AUGMENTATION = True
USE_VALIDATION_SET = False
SHUFFLE_CLASSES = True
DUMP_FINAL_RESULTS_ON_GSPREADSHEET = True
COMMIT_ON_GITHUB = True
EVAL_AFTER_EACH_EPOCH = False
BCE_VAR = 2          # 1: solo le classi attuali per il one-hot (loss divisa per 128x10, poi 128x20, etc.)
                     # 2: usa 100 classi fin da subito nel calcolo della loss (loss divisa sempre per 128x100)
                     # 3: usa le classi attuali per il one-hot ma dividi per 128x100 la loss
# ----------------------------------

# --- HYPERPARAMETERS
K = 2000
BATCH_SIZE = 128
LR = 0.01                   # iCaRL uses LR=2 solo perchè usa la BCE, in generale usare 0.2
MOMENTUM = 0.9              # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 1e-5         # Regularization

NUM_EPOCHS = 70             # Total number of training epochs (iterations over dataset)
DO_MULTILR_STEP_DOWN = True # step down at 7/10 and 9/10
STEP_SIZE = 10
GAMMA = 0.2                 # Multiplicative factor for learning rate step-down
# ---------------------

**Define Data Processing**

In [None]:
if CIFAR_NORMALIZE: 
  MEANS, STDS = (0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762) # Normalizes tensor with mean and standard deviation of CIFAR 100
else: 
  MEANS, STDS = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)

# Define transforms for training phase
if DATA_AUGMENTATION:
	train_transform = transforms.Compose([
											transforms.RandomCrop(32, padding=4),
											transforms.RandomHorizontalFlip(p=0.5),
											transforms.ToTensor(), # Turn PIL Image to torch.Tensor
											transforms.Normalize(mean=MEANS, std=STDS) # Normalizes tensor with mean and standard deviation of CIFAR 100
									])
else:
	train_transform = transforms.Compose([
											transforms.ToTensor(), # Turn PIL Image to torch.Tensor
											transforms.Normalize(mean=MEANS, std=STDS) # Normalizes tensor with mean and standard deviation of CIFAR 100
									])

test_transform = transforms.Compose([
										transforms.ToTensor(),
										transforms.Normalize(mean=MEANS, std=STDS) # Normalizes tensor with mean and standard deviation of CIFAR 100                                                                                                
								])

**Import dataset CIFAR-100**

In [None]:
#For any information about CIFAR-100 follow the link below
#https://www.cs.toronto.edu/~kriz/cifar.html

train_dataset = CIFAR100(DATA_DIR, train=True, download=True, transform=train_transform)
test_dataset = CIFAR100(DATA_DIR, train=False, download=False, transform=test_transform)

if SHUFFLE_CLASSES:
  # --- Shuffle class ordering
  if RANDOM_STATE == 'telegram':
    classes_indexes = np.array([30,  4, 36, 47, 81, 65, 66, 64, 68, 23, 72, 48, 54, 73,  6, 50, 51,
                          83, 75, 88, 58, 62, 39, 60, 94, 25, 84, 37, 33, 76, 34, 57, 46,  3,
                          24, 67, 17, 79, 40, 77, 26, 27, 41, 90, 89, 59, 20, 11, 61, 13, 44,
                          56,  9, 96, 70, 99, 82, 78,  5, 53, 16, 29,  0, 31,  7, 74, 55, 19,
                          42,  1, 92, 63, 52, 69, 22, 18, 28, 35,  8, 91, 86, 32, 97, 98, 15,
                            2, 45, 49, 95, 71, 14, 87, 80, 21, 38, 93, 43, 10, 12, 85])
    
  elif RANDOM_STATE == 'tarantino':
    random.seed(653)
    classes_indexes = [i for i in range(NUM_CLASSES)]

    classes_indexes_cum = []
    remaining = [i for i in range(NUM_CLASSES)]
    for i in range(10):
      classes_indexes_cum += random.sample(remaining, 10)
      remaining = list(set(classes_indexes)-set(classes_indexes_cum))

    classes_indexes = classes_indexes_cum
    classes_indexes = np.array(classes_indexes)

    print('Tarantino classes order:', classes_indexes)

  else:
    if RANDOM_STATE is not None:
      np.random.seed(RANDOM_STATE)

    classes_indexes = np.array([i for i in range(NUM_CLASSES)])
    np.random.shuffle(classes_indexes)


  classes_shuffle_dict = {ind:i for i, ind in enumerate(classes_indexes)}

  train_dataset.targets = [classes_shuffle_dict[tar] for tar in train_dataset.targets]
  test_dataset.targets = [classes_shuffle_dict[tar] for tar in test_dataset.targets]

  CLASSES = train_dataset.classes
  train_dataset.class_to_idx = {CLASSES[i]:ind for i,ind in enumerate(classes_indexes)}
  LABEL_INDEX_DICT = train_dataset.class_to_idx
else:
  CLASSES = train_dataset.classes
  LABEL_INDEX_DICT = train_dataset.class_to_idx

# show_random_images(train_dataset, 5, mean=MEANS, std=STDS)

print('Train Dataset length:', len(train_dataset))
print('Test Dataset length:', len(test_dataset))

**3-Layers MLP**

In [None]:
### 3-layers MLP

import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torch

class Perceptron(nn.Module):
    def __init__(self, num_classes=100):
        super(Perceptron, self).__init__()
        self.layers = nn.Sequential(
            # hidden layer 1
            nn.Linear(64, X1),
            nn.ReLU(),
            # hidden layer 2
            nn.Linear(X1, X2),
            nn.ReLU(),
            nn.Linear(X2, num_classes)
        ) 

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.layers(x)
        return x

def MLP(**kwargs):
  model = Perceptron(**kwargs)
  return model


**Prepare training**

In [None]:
net = resnet32(num_classes=NUM_CLASSES)
icarl = iCaRL(device=DEVICE, batch_size=BATCH_SIZE, K=K, dataset=train_dataset)

# Define loss function
criterion = nn.BCEWithLogitsLoss(reduction='mean') # reduction='sum' is crucial as BCE is designed for one output neuron only (it averages on batch_size*num_classes instead of on just batch_size) - actually this is why iCaRL keeps a really high learning rate
criterion_eval = None

**Training**

In [None]:
cudnn.benchmark # Calling this optimizes runtime

val_indexes_cum = []
test_indexes_cum = []
current_classes_cum = []

group_losses_train = []
group_losses_eval = []
group_accuracies_train = []
group_accuracies_eval = []
group_accuracies_eval_curr = []
group_accuracies_eval_KNN = []
group_accuracies_eval_MLP = []

now = datetime.datetime.now(timezone('Europe/Rome'))
CURRENT_RUN = 'RUN_' + now.strftime("%Y-%m-%d %H %M %S")
try:
  os.makedirs(RUNS_DIR+'/'+CURRENT_RUN)
except OSError:
  print ("FATAL ERROR - Creation of the directory of the current run failed")
  sys.exit()

dump_hyperparameters(path=RUNS_DIR+'/'+CURRENT_RUN, lr=LR, weight_decay=WEIGHT_DECAY, num_epochs=NUM_EPOCHS, method=METHOD, batch_size=BATCH_SIZE)

START_TIME = time.time()

for group_number in range(N_GROUPS_FOR_TRAINING):

  starting_label = (group_number*10)
  ending_label = (group_number+1)*10
  current_classes = list(range(starting_label, ending_label))

  new_indexes = get_indexes_from_labels(train_dataset, current_classes)

  # np.random.shuffle(new_indexes)

  train_dataset_curr = Subset(train_dataset, new_indexes)
  exemplars = icarl.flattened_exemplars()
  train_dataset_cum_exemplars = Subset(train_dataset, exemplars+new_indexes)

  # Update training set
  train_dataloader = DataLoader(train_dataset_curr, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
  train_dataloader_cum_exemplars = DataLoader(train_dataset_cum_exemplars, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

  train_dataloader_for_evaluation = DataLoader(train_dataset_curr, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
  train_dataloader_cum_exemplars_for_evaluation = DataLoader(train_dataset_cum_exemplars, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

  # Update test set
  new_test_indexes = get_indexes_from_labels(test_dataset, current_classes)
  test_dataset_cum = Subset(test_dataset, test_indexes_cum+new_test_indexes)
  test_dataset_curr = Subset(test_dataset, new_test_indexes)

  test_indexes_cum += new_test_indexes

  test_dataloader = DataLoader(test_dataset_cum, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
  test_dataloader_curr = DataLoader(test_dataset_curr, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

  print('******************************')
  print(f'NEW GROUP OF CLASSES {(group_number+1)}°/{N_GROUPS_FOR_TRAINING}')
  print('Training set length:', len(train_dataset_curr))
  if USE_VALIDATION_SET:
    print('Validation set length:', len(val_dataset_cum))
  print('Test set length:', len(test_dataset_cum))
  
  net = net.to(DEVICE)

  parameters_to_optimize = net.parameters()

  optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  milestone_1 = math.floor(NUM_EPOCHS/10*7)
  milestone_2 = math.floor(NUM_EPOCHS/10*9)

  scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[milestone_1, milestone_2], gamma=GAMMA)

  current_step = 0
  losses_train = []
  losses_eval = []
  accuracies_train = []
  accuracies_eval = []
  accuracies_eval_curr = []
  accuracies_eval_KNN = []
  accuracies_eval_MLP = []

  net_old = None
  if starting_label > 0:
    # Salva la rete attuale per calcolare i vecchi outputs
    net_old = deepcopy(net)

  net.train()
  for epoch in range(NUM_EPOCHS):
    print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

    #
    # Update weights using iCaRL BCE and distillation loss on Dataset
    #
    loss = icarl.update_representation(net, net_old, train_dataloader_cum_exemplars, criterion, optimizer, current_classes, starting_label, ending_label, current_step, bce_var=BCE_VAR)

    current_step += 1
    scheduler.step()

    print('--- Epoch {}, Loss on train: {}'.format(epoch+1, loss.item()))
    losses_train.append(loss.item())


  # --- END OF TRAINING FOR THIS GROUP OF CLASSES
  print('Length on train dataset (exemplars included):', len(train_dataset_cum_exemplars))

  #
  # Compute means of each class using the entire current training set and the exemplars
  #
  icarl.compute_means(net, train_dataloader_cum_exemplars, ending_label)

  if starting_label > 0:
    #
    # Reduce number of exemplars for each class to 2000/ending_label
    #
    icarl.reduce_exemplars(starting_label, ending_label)
 
  #
  # Construct exemplars for future evaluation
  #
  icarl.construct_exemplars(net, starting_label, ending_label, herding=USE_HERDING)
  exemplars = icarl.flattened_exemplars()
  if METHOD == 'iCARL_KNN':
    exemplars_KNN = Subset(train_dataset, exemplars)
    KNN_dataloader = DataLoader(exemplars_KNN, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=False)
  if METHOD == 'iCARL_MLP':
    exemplars_MLP = Subset(train_dataset, exemplars)
    MLP_dataloader = DataLoader(exemplars_MLP, batch_size=MLP_BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

  with torch.no_grad():
    _, accuracy_test = eval_model(net, test_dataloader, criterion=criterion_eval,
                                          dataset_length=len(test_dataset_cum), use_bce_loss=None,
                                          ending_label=ending_label, loss=False, device=DEVICE, display=True, suffix=' (group)')
  losses_eval.append(-1)
  accuracies_eval.append(accuracy_test)

  if METHOD == 'iCARL_KNN':
    #
    # Eval model using KNN on test set
    #
    with torch.no_grad():
      accuracy_eval_KNN = icarl.eval_model_KNN(net, KNN_dataloader, test_dataloader, dataset_length=len(test_dataset_cum), display=True, suffix=' (group)', k=NEIGHBORS)
    accuracies_eval_KNN.append(accuracy_eval_KNN)
  if METHOD == 'iCARL_MLP':
    #
    # Eval model using MLP on test set
    #
    with torch.no_grad():
      accuracy_eval_MLP = icarl.eval_model_MLP(net, MLP_dataloader, test_dataloader, dataset_length=len(test_dataset_cum), display=True, suffix=' (group)', lr=MLP_LR, bs=MLP_BATCH_SIZE, momentum=MLP_MOMENTUM, wd=MLP_WEIGHT_DECAY, num_epochs=MLP_EPOCHS,step_size=MLP_STEP_SIZE, gamma=MLP_GAMMA, nc=ending_label)
    accuracies_eval_MLP.append(accuracy_eval_MLP)

  #
  # Accuracy on training
  #
  with torch.no_grad():
    accuracy_train = eval_model_accuracy(net, train_dataloader_for_evaluation, dataset_length=len(train_dataset_curr), starting_label=starting_label, ending_label=ending_label, device=DEVICE, display=True, suffix='train (group)')
  accuracies_train.append(accuracy_train)

  #
  # Compute accuracy on test for novel classes only
  #
  with torch.no_grad():
    accuracy_eval_curr_classes = eval_model_accuracy(net, test_dataloader_curr, dataset_length=len(test_dataset_curr), starting_label=starting_label, ending_label=ending_label, device=DEVICE, display=True, suffix='test novel classes (group)')
  accuracies_eval_curr.append(accuracy_eval_curr_classes)

  path = RUNS_DIR+'/'+CURRENT_RUN    
  create_dir_for_current_group(group_number, path=path)
  
  draw_graphs(losses_train,
        losses_eval,
        accuracies_train,
        accuracies_eval,
        num_epochs=NUM_EPOCHS, use_validation=USE_VALIDATION_SET, print_img=False, save=True, path=path, group_number=group_number)
  
  dump_to_csv(losses_train,
        losses_eval,
        accuracies_train,
        accuracies_eval,
        group_number=group_number, path=path)

  group_losses_train.append(losses_train[-1])
  group_losses_eval.append(losses_eval[-1])
  group_accuracies_train.append(accuracies_train[-1])
  group_accuracies_eval.append(accuracies_eval[-1])
  group_accuracies_eval_curr.append(accuracies_eval_curr[-1])
  if METHOD == 'iCARL_KNN':
    group_accuracies_eval_KNN.append(accuracies_eval_KNN[-1])
  if METHOD == 'iCARL_MLP':
    group_accuracies_eval_MLP.append(accuracies_eval_MLP[-1])

# END OF OVERALL TRAINING
if METHOD == 'iCARL_KNN':
  dump_final_values_nme(group_losses_train, group_accuracies_train, group_accuracies_eval_KNN, group_accuracies_eval, group_accuracies_eval_curr, path=RUNS_DIR+'/'+CURRENT_RUN)
  draw_final_graphs_nme(group_losses_train, group_accuracies_eval_KNN, group_accuracies_eval, use_validation=USE_VALIDATION_SET, print_img=True, save=True, path=RUNS_DIR+'/'+CURRENT_RUN)

  print('Average incremental accuracy (KNN)', np.mean(group_accuracies_eval_KNN))

if METHOD == 'iCARL_MLP':
  dump_final_values_nme(group_losses_train, group_accuracies_train, group_accuracies_eval_MLP, group_accuracies_eval, group_accuracies_eval_curr, path=RUNS_DIR+'/'+CURRENT_RUN)
  draw_final_graphs_nme(group_losses_train, group_accuracies_eval_MLP, group_accuracies_eval, use_validation=USE_VALIDATION_SET, print_img=True, save=True, path=RUNS_DIR+'/'+CURRENT_RUN)

  print('Average incremental accuracy (MLP)', np.mean(group_accuracies_eval_MLP))

print('Average incremental accuracy (hybrid 1)', np.mean(group_accuracies_eval))

#
# Compute and display confusion matrix
#
#conf_mat = get_conf_matrix_nme(net, test_dataloader, icarl=icarl, ending_label=ending_label, device=DEVICE)
#display_conf_matrix(conf_mat, display=True, save=True, path=RUNS_DIR+'/'+CURRENT_RUN)

DURATION = round((time.time()-START_TIME)/60, 1)
print(f"> In {(DURATION)} minutes")

github_link = 'https://github.com/gabrieletiboni/Incremental-learning-on-image-recognition/tree/master/RUNS/'+str(CURRENT_RUN)
github_link = github_link.replace(" ", "%20")
hyperparameters_string = get_hyperparameter_string(lr=LR, weight_decay=WEIGHT_DECAY, num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, multilrstep=DO_MULTILR_STEP_DOWN, gamma=GAMMA)
if DUMP_FINAL_RESULTS_ON_GSPREADSHEET:
  if METHOD == 'iCARL_KNN':
    dump_on_gspreadsheet_nme(CURRENT_RUN, GITHUB_USER, github_link, METHOD, RANDOM_STATE, USE_HERDING,
                            CIFAR_NORMALIZE, BCE_VAR, group_losses_train, group_accuracies_train, group_accuracies_eval_KNN,
                            group_accuracies_eval, group_accuracies_eval_curr, DURATION,
                            hyperparameters=hyperparameters_string, ablation='clf', params='K = {}'.format(NEIGHBORS))
  if METHOD == 'iCARL_MLP':
    dump_on_gspreadsheet_nme(CURRENT_RUN, GITHUB_USER, github_link, METHOD, RANDOM_STATE, USE_HERDING,
                            CIFAR_NORMALIZE, BCE_VAR, group_losses_train, group_accuracies_train, group_accuracies_eval_MLP,
                            group_accuracies_eval, group_accuracies_eval_curr, DURATION,
                            hyperparameters=hyperparameters_string, ablation='clf', params='MLP')

beep()