<a href="https://colab.research.google.com/github/alessandronicolini/IncrementalLearning/blob/main/iCarl_nme_ce_mse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
!pip3 install 'import_ipynb'
!pip3 install 'tqdm'

!rm -r IncrementalLearning
# upload work files from your git hub repository
import sys

!git clone https://github.com/alessandronicolini/IncrementalLearning.git # clone proj repository
!rm -rf IncrementalLearning/README.md 
!rm -rf IncrementalLearning/baselines.ipynb

path = 'IncrementalLearning/'
if path not in sys.path:
    sys.path.append('IncrementalLearning/')

!pip3 install import_ipynb

Cloning into 'IncrementalLearning'...
remote: Enumerating objects: 25, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 499 (delta 12), reused 0 (delta 0), pack-reused 474[K
Receiving objects: 100% (499/499), 460.85 KiB | 5.49 MiB/s, done.
Resolving deltas: 100% (287/287), done.


In [20]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
from PIL import Image
import torchvision
import torchvision.transforms as transforms
import math
from sklearn.preprocessing import normalize
import copy
import torchvision.datasets as dsets
import torchvision.models as models
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from torch.utils.data import Subset, DataLoader
import random
from sklearn.metrics import confusion_matrix as s_cm
import seaborn as sn
import pandas as pd

import import_ipynb
from IncrementalLearning.cifar100 import ilCIFAR100

from IncrementalLearning.resnet_cifar import resnet32
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [21]:
class icarl(nn.Module):
  def __init__(self, randomseed, n_classes=100, classificationLoss=nn.CrossEntropyLoss(), distillationLoss=nn.MSELoss()):
    super(icarl, self).__init__()

    self.classification_loss = classificationLoss
    self.distillation_loss = distillationLoss

    self.model = resnet32(num_classes=n_classes)
    self.feature_extractor = self.model.features
    self.lr = 0.1
    self.gamma = 0.2
    self.weight_decay =1e-5 
    self.milestones = [49,63]
    self.batch_size = 128
    self.numepochs = 70
    self.n_classes = 0
    self.n_known = 0
    self.feature_size=64
    self.momentum=0.9
    self.criterion=nn.BCEWithLogitsLoss()
    self.compute_means = True
    self.exemplar_means = None
    self.exemplar_sets = []
    self.exemplar_labels = []
    self.NUM_BATCHES=10
    self.randomseed=randomseed
    self.trainloader=None
    self.testloader=None
    self.CLASSES_PER_BATCH=10
    self.original_training_set = ilCIFAR100(self.CLASSES_PER_BATCH,self.randomseed)
    self.original_test_set = ilCIFAR100(self.CLASSES_PER_BATCH,self.randomseed, train=False)

    self.classes_seen=0
    self.diz = self.original_training_set.get_dict()

  def update_parameters(self):
    old_model = copy.deepcopy(self)
    old_model.eval()
    old_model.to('cuda')
    n_classes = self.classes_seen+self.CLASSES_PER_BATCH
    print(n_classes)
    optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.milestones, gamma=self.gamma)
    for epoch in tqdm(range(self.numepochs)):
        
      for _, inputs, labels in self.trainloader:
        inputs = inputs.cuda()
        labels = torch.tensor([self.diz[c.item()] for c in labels])
        labels = labels.cuda()
        optimizer.zero_grad()
        outputs=self.forward(inputs)
        
        if self.classes_seen:
          old_target = old_model.forward(inputs)
          old_target = torch.softmax(old_target,dim=1)
          softmax_outputs = torch.softmax(outputs,dim=1)

          dist_loss = self.distillation_loss(softmax_outputs[:,:self.classes_seen], old_target[:,:self.classes_seen])
          class_loss = self.classification_loss(outputs, labels)
          loss = class_loss+dist_loss
        else:
          # primo batch di 10 classi
          loss = self.classification_loss(outputs[:,:self.CLASSES_PER_BATCH], labels) 

        loss.backward()
        optimizer.step()
      
      scheduler.step()

  def classify_nme(self, input_batch):
    min_distances = float('inf')*torch.ones(len(input_batch)).cuda() # shape: batch_size --> 128
    y_pred = torch.zeros(len(input_batch), dtype=torch.int8).cuda() # shape: batch_size --> 128
    input_features = self.feature_extractor(input_batch) # shape: (batch_size, feature_size) --> (128, 64)

    for i in range(len(self.exemplar_sets)):
      ex_mean = self.exemplar_means[i,:]

      # compute distances between inputs features and exemplar set means
      pdist = nn.PairwiseDistance(p=2)
      distances = pdist(input_features, ex_mean) # shape: batch_size --> 128

      # update min distancies and predicted labels
      mask = distances < min_distances
      min_distances[mask] = distances[mask]
      y_pred[mask] = self.exemplar_labels[i]

    return y_pred
    

  def get_new_exemplars(self, batch, m):
    loader = torch.utils.data.DataLoader(batch, batch_size=self.batch_size, shuffle=False, num_workers=4)
    features = np.zeros((0,self.feature_size))
    indices = np.zeros((0), dtype=int)

    # disabilitando il calcolo del gradiente, per ogni batch di immagini calcola
    # le features (shape 128,64), normalizza lungo le righe, concatena gli elemnti

    with torch.no_grad():
      for indexes, images, labels in loader:
        images = images.cuda()
        feature = self.feature_extractor(images).data.cpu().numpy() # shape (128, 64)
        feature = normalize(feature, axis=1, norm='l2') # normalizza lungo le righe
        features = np.concatenate((features,feature), axis=0) 
        indices = np.concatenate((indices,indexes), axis=0) # shape 128

    # calcola il valore medio per ogni colonna (ogni feature) e poi normalizza
    # i vettore dei valori medi
    class_mean = np.mean(features, axis=0) # shape 64
    class_mean = class_mean / np.linalg.norm(class_mean) # shape 64

    exemplar_set = []
    exemplar_features = np.zeros((0,self.feature_size)) # shape _, 64

    for k in range(1, int(m)+1):
        S = np.sum(exemplar_features, axis=0)
        phi = features
        mu = class_mean
        mu_p = 1.0 / k * (phi + S)
        mu_p = normalize(mu_p, axis=1, norm='l2')
        i = np.argmin(np.sqrt(np.sum((mu - mu_p) ** 2, axis=1)))
        exemplar_set.append(indices[i])
        addfeature =  np.expand_dims(features[i], axis=0)
        exemplar_features = np.concatenate((exemplar_features,addfeature), axis=0)

        #remove duplicates
        features = np.delete(features, i, 0)
        indices = np.delete(indices, i, 0)
        
    self.exemplar_sets.append(exemplar_set)
        
  def reduce_old_exemplars(self,m):
    # selezioni i primi m elementi di ogni exemplar set, all'inizio 
    # self.exemplar_sets è vuoto e non viene effettuata alcuna operazione
    for y, P_y in enumerate(self.exemplar_sets):
            self.exemplar_sets[y] = P_y[:int(m)]

  def forward(self, x):
    self.model = self.model.cuda()

    return self.model.forward(x)

  def __accuracy_fc(self, dl, model, mapper):
    total = 0.0
    correct = 0.0
    for  _, images, labels in dl:
      labels = torch.tensor([mapper[c.item()] for c in labels])
      labels = labels.cuda()
      images = images.cuda()
      outputs = self.forward(images)
      _, preds = torch.max(outputs, dim=1)
      total += len(labels)
      correct += torch.sum(preds == labels).item()
    acc = correct / total

    return acc

  def __accuracy_nme(self, dl, model):
    total = 0.0
    correct = 0.0
    for  _, images, labels in dl:
      labels = labels.cuda()
      images = images.cuda()
      outputs = model(images)
      preds = self.classify_nme(images)
      total += len(labels)
      correct += torch.sum(preds == labels).item()
    acc = correct / total

    return acc

  def training_model(self):
    # lista di 10 elementi, ogni elemento è un array che contiene 5000 indici
    # delle immagini per fare il training. In ogni array ci sono indici
    # appartenenti a 10 classe diverse.
    train_indices = self.original_training_set.get_batch_indexes() 

    # stessa strutture dei train_indices ma il numero di indici per elemento è 
    # 1000
    test_indices = self.original_test_set.get_batch_indexes()

    # classi degli elementi che sono utilizzati and ogni passo
    batches = self.original_training_set.getbatches()

    current_test_indexes = []
    acc = []
    accuracy = 0
    for i in range(self.NUM_BATCHES):
      for exemplars in self.exemplar_sets:
        # aggiorna gli elementi di training del batch di 10 classi corrente  
        # aggiungendo gli exemplars trovati allo step precedente
        train_indices[i] = np.concatenate([train_indices[i], np.array(exemplars)])

      train_dataset = Subset(self.original_training_set, train_indices[i])
      # devi fare il test sugli elementi appartenenti alle 10 classi correnti ma
      # anche su tutti quelli utilizzati per il test nei passi precedenti
      current_test_indexes += test_indices[i].tolist()
      test_dataset = Subset(self.original_test_set, current_test_indexes)
      
      # elementi del test appartenenti solo al batch di 10 classi corrente
      actual_test_dataset = Subset(self.original_test_set,test_indices[i])
      
      # dataloaders
      self.trainloader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, drop_last=True)
      self.testloader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4, drop_last=True)        
      self.actualtestloader = DataLoader(actual_test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4, drop_last=True)
      
      self.train()
      self.update_parameters()    
      self.classes_seen += 10
      self.eval() # Set Network to evaluation mode

      # reduce exemplars
      m = int(2000/(int(i*10+10)))
      self.reduce_old_exemplars(m) 

      # add new exemplar sets
      for classlabel in batches[i]:
        
        # salva tutti gli indici degli elementi di classe classLabel
        indexes_class = self.original_training_set.get_class_indexes(classlabel)
        
        # crea un dataset con le immagini identificate da quegli indici
        current_class = Subset(self.original_training_set, indexes_class)
        self.get_new_exemplars(current_class, m)
      
      # compute means of exemplar set
      # cycle for each exemplar set
      self.exemplar_means = torch.zeros((0, self.feature_size), dtype=torch.float).cuda()
      self.exemplar_labels = []
      for i in range(len(self.exemplar_sets)):
        exemplars_dataset = Subset(self.original_training_set, self.exemplar_sets[i])
        exemplars_loader = torch.utils.data.DataLoader(exemplars_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4)
        ex_features = torch.zeros((0, self.feature_size), dtype=torch.float).cuda() # alla fine shape: (len(exemplar_set), feature_size) --> (m, 64)
      
        with torch.no_grad():
          _, _, exemplar_label = self.original_training_set.__getitem__(self.exemplar_sets[i][0]) 
          self.exemplar_labels.append(exemplar_label)
          
          # cycle for each batch in the current exemplar set
          for _,  exemplars, _ in exemplars_loader:
          
            # get exemplars features
            exemplars = exemplars.cuda()
            features = self.feature_extractor(exemplars) # shape: (len(exemplars), feature_size)
          
            # normalize 
            feature_norms = torch.norm(features, p=2, dim=1) # shape: len(exemplars)
            feature_norms.unsqueeze_(1) # shape: (len(exemplars), 1)
            features = features/feature_norms
          
            # concatenate over columns
            ex_features = torch.cat((ex_features, features), dim=0)
          
        # compute current exemplar set mean and normalize it
        ex_mean = torch.mean(ex_features, dim=0) # shape: feature_size --> 64
        ex_mean = ex_mean/torch.norm(ex_mean)
        ex_mean.unsqueeze_(0) # shape: (1, feature_size) --> (1, 64)
        self.exemplar_means = torch.cat((self.exemplar_means, ex_mean), dim=0) # shape: (n_examplar set, feature size)
      
      print('accuracy on training set: ', 100*self.__accuracy_fc(self.trainloader,self,self.diz))
      print('accuracy on test set: ', 100*self.__accuracy_nme(self.testloader, self))
      print('accuracy on actual test set: ', 100*self.__accuracy_nme(self.actualtestloader, self))
      print('-' * 80)
      acc.append(accuracy)

In [22]:
model = icarl(randomseed=7).cuda()
model.training_model()

Files already downloaded and verified
Files already downloaded and verified
10


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.39903846153845
accuracy on test set:  82.36607142857143
accuracy on actual test set:  82.36607142857143
--------------------------------------------------------------------------------
20


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.85532407407408
accuracy on test set:  71.71875
accuracy on actual test set:  74.33035714285714
--------------------------------------------------------------------------------
30


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.88425925925925
accuracy on test set:  64.50407608695652
accuracy on actual test set:  76.89732142857143
--------------------------------------------------------------------------------
40


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.92766203703704
accuracy on test set:  61.517137096774185
accuracy on actual test set:  75.78125
--------------------------------------------------------------------------------
50


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.92766203703704
accuracy on test set:  55.82932692307693
accuracy on actual test set:  71.42857142857143
--------------------------------------------------------------------------------
60


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.94212962962963
accuracy on test set:  53.88926630434783
accuracy on actual test set:  72.20982142857143
--------------------------------------------------------------------------------
70


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f936c129e48>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1177, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f936c129e48>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    sel


accuracy on training set:  99.91319444444444
accuracy on test set:  49.508101851851855
accuracy on actual test set:  68.19196428571429
--------------------------------------------------------------------------------
80


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.95659722222221
accuracy on test set:  46.585181451612904
accuracy on actual test set:  68.41517857142857
--------------------------------------------------------------------------------
90


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.92766203703704
accuracy on test set:  45.323660714285715
accuracy on actual test set:  66.96428571428571
--------------------------------------------------------------------------------
100


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))


accuracy on training set:  99.88425925925925
accuracy on test set:  43.52964743589743
accuracy on actual test set:  63.05803571428571
--------------------------------------------------------------------------------
