<a href="https://colab.research.google.com/github/cosminnedescu/ProjectMLDL/blob/main/classifiers/iCaRL-v1_3_0-SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Avoid K80
!nvidia-smi

Sat Jul  3 09:05:50 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   47C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import os
import random

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.autograd import Variable

import torchvision
from torchvision import transforms, datasets, models
from torch.utils.data import Subset, DataLoader

from PIL import Image

import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
from copy import copy
from copy import deepcopy

#### Cloning the Git repository

In [3]:
!rm -rf ProjectMLDL
if not os.path.isdir('/content/ProjectMLDL'):
  !git clone https://github.com/cosminnedescu/ProjectMLDL.git
  %cd /content/ProjectMLDL
  !rm -rf LICENSE README.md

Cloning into 'ProjectMLDL'...
remote: Enumerating objects: 1838, done.[K
remote: Counting objects: 100% (324/324), done.[K
remote: Compressing objects: 100% (251/251), done.[K
remote: Total 1838 (delta 240), reused 73 (delta 73), pack-reused 1514[K
Receiving objects: 100% (1838/1838), 69.81 MiB | 30.02 MiB/s, done.
Resolving deltas: 100% (986/986), done.
/content/ProjectMLDL


In [4]:
from data.cifar100 import CIFAR100
from model.resnet32 import resnet32
import data.utils
from model.icarl import iCaRL

In [5]:
# True mean and std of Cifar100 dataset (src="https://gist.github.com/weiaicunzai/e623931921efefd4c331622c344d8151")
mean = [0.5071, 0.4867, 0.4408]
std = [0.2675, 0.2565, 0.2761]

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])
test_transform = transforms.Compose(
    [transforms.ToTensor(), 
     transforms.Normalize(mean, std),
     ])

## Incremental Classifier and Representation Learning

### Defining hyperparameters according to iCarl paper

In [6]:
# Settings
DEVICE = 'cuda'
NUM_CLASSES = 100         # Total number of classes
VAL_SIZE = 0.2            # Proportion of validation set with respect to training set (between 0 and 1)

# Training
BATCH_SIZE = 128          # Batch size
LR = 2                    # Initial learning rate
                       
MOMENTUM = 0.9            # Momentum for stochastic gradient descent (SGD)
WEIGHT_DECAY = 1e-5       # Weight decay from iCaRL

RANDOM_SEED = [42]  # Random seeds defining the runs of every method
                          # Note: this should be at least 3 to have a fair benchmark

NUM_EPOCHS = 7           # Total number of training epochs
MILESTONES = [49, 63]     # Step down policy from iCaRL (MultiStepLR)
                          # Decrease the learning rate by gamma at each milestone
GAMMA = 0.2               # Gamma factor from iCaRL (1/5)

HERDING = False           # True to perform prioritized selection, False to perform random selection
CLASSIFY = True           # True to use mean-of-exemplar classifier, False to use network's output directly for classification

In [7]:
from data.exemplar import Exemplar
from torch.utils.data import DataLoader, ConcatDataset
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection  import ParameterGrid
from torch.backends import cudnn

In [15]:
class SVM_Classifier(iCaRL):
  
  def __init__(self, device, net, LR, MOMENTUM, WEIGHT_DECAY, MILESTONES, GAMMA, train_dl, validation_dl, test_dl, BATCH_SIZE, train_subset, train_transform, test_transform, params):
    super().__init__(device, net, LR, MOMENTUM, WEIGHT_DECAY, MILESTONES, GAMMA, train_dl, validation_dl, test_dl, BATCH_SIZE, train_subset, train_transform, test_transform)
    #self.validation_set = val_set
    #self.test_set = test_set
    self.PARAMS = params

  def separate_data(self, data):
    """
    X = np.zeros((len(data), 3, 32, 32))
    y = np.zeros(len(data), dtype=int)
    
    dataloader = DataLoader(data, batch_size=1)
    for i, (_, images, labels) in enumerate(dataloader):
      X[i] = images[0].numpy()
      y[i] = labels.numpy()[0]
    X_features = self.features_extractor(torch.tensor(X, dtype=torch.float))
    for i in range(X_features.size(0)):
      X_features[i] = X_features[i]/X_features[i].norm()
    X_features = X_features.to('cpu').numpy()
    return X_features, y
    """
    all_features = torch.tensor([])
    all_features = all_features.type(torch.LongTensor)
    all_targets = torch.tensor([])
    all_targets = all_targets.type(torch.LongTensor)
    for _, images, labels in data:
      images = images.to(self.DEVICE)
      labels = labels.to(self.DEVICE)
      
      all_targets = torch.cat((all_targets.to(self.DEVICE), labels.to(self.DEVICE)), dim=0)
      feature_map = self.features_extractor(images)
      for i in range(feature_map.size(0)):
        feature_map[i] = feature_map[i] / feature_map[i].norm()
      feature_map = feature_map.to(self.DEVICE)
      all_features = torch.cat((all_features.to(self.DEVICE), feature_map.to(self.DEVICE)), dim=0)
    return all_features.detach().cpu(), all_targets.detach().cpu()
    
    
  def fit_train_data(self, classes_group_idx):
    
    #exemplars = Exemplar(self.exemplar_set, self.train_transform)
    #train_set.dataset.set_transform_status(True)
    #ex_train_set = ConcatDataset([exemplars, train_set])
    X_train, y_train = self.separate_data(self.train_dl[classes_group_idx])
    #val_set.dataset.set_transform_status(True)
    X_test, y_test = self.separate_data(self.validation_dl[classes_group_idx])
    
    self.clf = SVC()   
    best_clf = None
    best_grid = None
    best_score = 0
    
    for grid in ParameterGrid(self.PARAMS):
        self.clf.set_params(**grid)
        self.clf.fit(X_train, y_train)
        y_pred = self.clf.predict(X_test)
        score = accuracy_score(y_test, y_pred)

        if score > best_score:
            best_clf = deepcopy(self.clf)
            best_score = score
            best_grid = grid
    else:
      self.clf = best_clf

    print(f"Best classifier: {best_grid} with score {best_score}")
  
  def predict_test_data(self, classes_group_idx):
    X_test, y_test = self.separate_data(self.test_dl[classes_group_idx])
    y_pred = self.clf.predict(X_test)
    return y_test, y_pred
  
  def test_classify(self, classes_group_idx, train_set):
    self.best_net.train(False)
    if self.best_net is not None: self.best_net.train(False)
    if self.old_net is not None: self.old_net.train(False)

    all_preds = torch.tensor([])
    all_preds = all_preds.type(torch.LongTensor)
    all_targets = torch.tensor([])
    all_targets = all_targets.type(torch.LongTensor)
    
    with torch.no_grad():
      self.fit_train_data(classes_group_idx)
      labels, preds = self.predict_test_data(classes_group_idx)
      accuracy = accuracy_score(labels, preds)

      labels = torch.tensor(labels)
      preds = torch.tensor(preds)
      all_targets = torch.cat((all_targets.to(self.DEVICE), labels.to(self.DEVICE)), dim=0)
      all_preds = torch.cat((all_preds.to(self.DEVICE), preds.to(self.DEVICE)), dim=0) 

    return accuracy, all_targets, all_preds

In [9]:
def do_group_classes(run):

  train_subset = [[] for i in range(10)]
  val_subset = [[] for i in range(10)]
  test_set = [[] for i in range(10)]
  train_dataloader = [[] for i in range(10)]
  val_dataloader = [[] for i in range(10)]
  test_dataloader = [[] for i in range(10)]

  for i in range(10):
    train_data = CIFAR100("dataset", 
                          train=True, 
                          transform=train_transform, 
                          download=(run+i==0),
                          random_state=RANDOM_SEED[run])
    test_data = CIFAR100("dataset", 
                         train=False, 
                         transform=test_transform, 
                         download=False,
                         random_state=RANDOM_SEED[run])
    
    train_data.set_index_map(train_data.splits[i])
    test_data.set_index_map([test_data.splits[j] for j in range(0, i+1)])
    
    train_indices, val_indices = train_data.train_val_split(VAL_SIZE, RANDOM_SEED[run])
    
    train_subset[i] = copy(Subset(train_data, train_indices))
    val_subset[i] = Subset(train_data, val_indices)
    test_set[i] = copy(test_data)

    tmp_dl = DataLoader(val_subset[i],
                       batch_size=BATCH_SIZE,
                       shuffle=True, 
                       num_workers=4,
                       drop_last=True)
    val_dataloader[i] = copy(tmp_dl)

    tmp_dl = DataLoader(test_data,
                       batch_size=BATCH_SIZE,
                       shuffle=True, 
                       num_workers=4,
                       drop_last=True)
    test_dataloader[i] = copy(tmp_dl)

  return train_dataloader, val_dataloader, test_dataloader, train_subset, val_subset, test_set

### Going on with the model
This is the main iCaRL step.

This step is run 3 times with different `RANDOM_SEED`.
Here the model is instantiated, trained and tested.

Results and some statistics are then stored in the variable `logs`.

In [10]:
params = {'C': [.1, 1, 10, 100]}

In [None]:
logs = [[] for i in range(len(RANDOM_SEED))]
best_net_tot_classes = [None for i in range(len(RANDOM_SEED))]

for run in range(len(RANDOM_SEED)):
  print("#################################")
  print(f"Radom seed: {RANDOM_SEED[run]}")
  print("")

  # get data_subsets separated in incremental groups of 10 classes
  train_dl, val_dl, test_dl, train_set, val_set, test_set = do_group_classes(run)

  #create the resnet
  net = resnet32()
  
  trainer = SVM_Classifier(DEVICE,
                  net,
                  LR,
                  MOMENTUM,
                  WEIGHT_DECAY,
                  MILESTONES,
                  GAMMA,
                  train_dl,
                  val_dl,
                  test_dl,
                  BATCH_SIZE,
                  train_set,
                  train_transform,
                  test_transform,
                  params)

  #train and evaluate the model
  logs[run] = trainer.train_model(NUM_EPOCHS, HERDING, CLASSIFY)

  best_net_tot_classes[run] = deepcopy(trainer.best_net)

  print("#################################")
  print("")
  print("")

#################################
Radom seed: 42

Files already downloaded and verified


  cpuset_checked))


Length of exemplars set: 0
Epoch 1/7 LR: [2]
Validation accuracy on group 1/10: 0.10
Best model updated

Epoch 2/7 LR: [2]
Validation accuracy on group 1/10: 0.10
Best model updated

Epoch 3/7 LR: [2]
Validation accuracy on group 1/10: 0.09

Epoch 4/7 LR: [2]
Validation accuracy on group 1/10: 0.10
Best model updated

Epoch 5/7 LR: [2]
Validation accuracy on group 1/10: 0.09

Epoch 6/7 LR: [2]
Validation accuracy on group 1/10: 0.11
Best model updated

Epoch 7/7 LR: [2]
Validation accuracy on group 1/10: 0.11

Group 1 Finished!
Best accuracy found at epoch 6: 0.11
Target number of exemplars: 200
Randomly extracting exemplars from class 0 of current split... Extracted 200 exemplars.
Randomly extracting exemplars from class 1 of current split... Extracted 200 exemplars.
Randomly extracting exemplars from class 2 of current split... Extracted 200 exemplars.
Randomly extracting exemplars from class 3 of current split... Extracted 200 exemplars.
Randomly extracting exemplars from class 4 of

  cpuset_checked))


Testing classes seen so far, accuracy: 0.10


Length of exemplars set: 2000


  cpuset_checked))


Epoch 1/7 LR: [2]
Validation accuracy on group 2/10: 0.00

Epoch 2/7 LR: [2]
Validation accuracy on group 2/10: 0.00

Epoch 3/7 LR: [2]
Validation accuracy on group 2/10: 0.00

Epoch 4/7 LR: [2]
Validation accuracy on group 2/10: 0.00

Epoch 5/7 LR: [2]
Validation accuracy on group 2/10: 0.06
Best model updated

Epoch 6/7 LR: [2]
Validation accuracy on group 2/10: 0.00

Epoch 7/7 LR: [2]
Validation accuracy on group 2/10: 0.00

Group 2 Finished!
Best accuracy found at epoch 5: 0.06
Target number of exemplars: 100
Randomly extracting exemplars from class 0 of current split... Extracted 100 exemplars.
Randomly extracting exemplars from class 1 of current split... Extracted 100 exemplars.
Randomly extracting exemplars from class 2 of current split... Extracted 100 exemplars.
Randomly extracting exemplars from class 3 of current split... Extracted 100 exemplars.
Randomly extracting exemplars from class 4 of current split... Extracted 100 exemplars.
Randomly extracting exemplars from class 

  cpuset_checked))


Testing classes seen so far, accuracy: 0.05


Length of exemplars set: 2000


  cpuset_checked))


Epoch 1/7 LR: [2]
Validation accuracy on group 3/10: 0.01
Best model updated

Epoch 2/7 LR: [2]
Validation accuracy on group 3/10: 0.00

Epoch 3/7 LR: [2]
Validation accuracy on group 3/10: 0.01
Best model updated

Epoch 4/7 LR: [2]
Validation accuracy on group 3/10: 0.01
Best model updated

Epoch 5/7 LR: [2]
Validation accuracy on group 3/10: 0.02
Best model updated

Epoch 6/7 LR: [2]
Validation accuracy on group 3/10: 0.04
Best model updated

Epoch 7/7 LR: [2]
Validation accuracy on group 3/10: 0.04
Best model updated

Group 3 Finished!
Best accuracy found at epoch 7: 0.04
Target number of exemplars: 66
Randomly extracting exemplars from class 0 of current split... Extracted 66 exemplars.
Randomly extracting exemplars from class 1 of current split... Extracted 66 exemplars.
Randomly extracting exemplars from class 2 of current split... Extracted 66 exemplars.
Randomly extracting exemplars from class 3 of current split... Extracted 66 exemplars.
Randomly extracting exemplars from clas

### Store logs in more usable dtype

In [None]:
train_loss = [[logs[run_i]['group_train_loss'][i] for i in range(10)] for run_i in range(len(RANDOM_SEED))]
train_accuracy = [[logs[run_i]['group_train_accuracies'][i] for i in range(10)] for run_i in range(len(RANDOM_SEED))]
val_loss = [[logs[run_i]['val_losses'][i] for i in range(10)] for run_i in range(len(RANDOM_SEED))]
val_accuracy = [[logs[run_i]['val_accuracies'][i] for i in range(10)] for run_i in range(len(RANDOM_SEED))]
test_accuracy = [[logs[run_i]['test_accuracies'][i] for i in range(10)] for run_i in range(len(RANDOM_SEED))]
predictions = [logs[run_i]['predictions'].cpu().data.numpy().tolist() for run_i in range(len(RANDOM_SEED))]
true_labels = [logs[run_i]['true_labels'].cpu().data.numpy().tolist() for run_i in range(len(RANDOM_SEED))]

### Save the model

#### Saving logs in JSON files

In [None]:
import json

with open('states/iCaRL_Classifier_Cosine_train_loss.json', 'w') as f:
  json.dump(train_loss, f)
f.close
with open('states/iCaRL_Classifier_Cosine_train_accuracy.json', 'w') as f:
  json.dump(train_accuracy, f)
f.close
with open('states/iCaRL_Classifier_Cosine_val_loss.json', 'w') as f:
  json.dump(val_loss, f)
f.close  
with open('states/iCaRL_Classifier_Cosine_val_accuracy.json', 'w') as f:
  json.dump(val_accuracy, f)
f.close
with open('states/iCaRL_Classifier_Cosine_test_accuracy.json', 'w') as f:
  json.dump(test_accuracy, f)
f.close
with open('states/iCaRL_Classifier_Cosine_predictions.json', 'w') as f:
  json.dump(predictions, f)
f.close
with open('states/iCaRL_Classifier_Cosine_true_labels.json', 'w') as f:
  json.dump(true_labels, f)
f.close

#### Saving best resnet on 100 classes for each seed

In [None]:
for i in range(len(RANDOM_SEED)):
  torch.save(best_net_tot_classes[i].state_dict(), "states/iCaRL_Classifier_Cosine_bestnet_seed[{}]".format(RANDOM_SEED[i]))

In [None]:
from google.colab import files
files.download('states/iCaRL_Classifier_Cosine_train_loss.json')
files.download('states/iCaRL_Classifier_Cosine_train_accuracy.json')
files.download('states/iCaRL_Classifier_Cosine_val_loss.json')
files.download('states/iCaRL_Classifier_Cosine_val_accuracy.json')
files.download('states/iCaRL_Classifier_Cosine_test_accuracy.json')
files.download('states/iCaRL_Classifier_Cosine_predictions.json')
files.download('states/iCaRL_Classifier_Cosine_true_labels.json')
files.download('states/iCaRL_Classifier_Cosine_bestnet_seed[42]')
files.download('states/iCaRL_Classifier_Cosine_bestnet_seed[13]')
files.download('states/iCaRL_Classifier_Cosine_bestnet_seed[10]')

### Print some graphs

In [None]:
from data.utils_plot import plot_train_val, plot_test_accuracies, plot_confusion_matrix

In [None]:
train_loss = np.array(train_loss)
train_accuracy = np.array(train_accuracy)
val_loss = np.array(val_loss)
val_accuracy = np.array(val_accuracy)
test_accuracy = np.array(test_accuracy)

train_loss_stats = np.array([train_loss.mean(0), train_loss.std(0)]).transpose()
train_accuracy_stats = np.array([train_accuracy.mean(0), train_accuracy.std(0)]).transpose()
val_loss_stats = np.array([val_loss.mean(0), val_loss.std(0)]).transpose()
val_accuracy_stats = np.array([val_accuracy.mean(0), val_accuracy.std(0)]).transpose()
test_accuracy_stats = np.array([test_accuracy.mean(0), test_accuracy.std(0)]).transpose()

#### Train validation loss

In [None]:
plot_train_val(train_loss_stats, val_loss_stats, loss = True)

#### Train validation accuracy

In [None]:
plot_train_val(train_accuracy_stats, val_accuracy_stats, loss = False)

#### Test accuracy

In [None]:
plot_test_accuracies(test_accuracy_stats)

#### Confusion Matrix

In [None]:
for run in range(len(RANDOM_SEED)):
  targets = np.array(true_labels[run])
  preds = np.array(predictions[run])

  plot_confusion_matrix(targets, preds, RANDOM_SEED[run], 'iCaRL_Cosine')