In [None]:
#@title Imports and seeds

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import sys
import os
import random
import numpy as np
import time
import math

seed = 69420


# https://pytorch.org/docs/stable/notes/randomness.html
random.seed(seed)
np.random.seed(seed)

torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)

cudnn.benchmark = False
cudnn.deterministic = True # it might cause runtime errors on certain operations

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)


In [None]:
! python --version

In [None]:
%%capture
!pip install avalanche-lib==0.3.1

In [None]:
#@title Train and test loops


def train(network, optimizer, loss_fn, data_loader, device):
    train_loss = 0.0
    num_batches = 0
    total_predictions = 0
    correct_predictions = 0

    network.train()
    network.zero_grad()
    for batch, labels, task_indices in data_loader:
        batch, labels = batch.to(device), labels.to(device)

        outputs = network(batch)
        loss = loss_fn(outputs, labels)
        loss.backward()

        optimizer.step()
        network.zero_grad()

        train_loss += loss.item()
        _, predicted = outputs.max(1)

        total_predictions += labels.size(0)
        correct_predictions += predicted.eq(labels).sum().item()
        num_batches += 1
  
    return train_loss / num_batches, correct_predictions / total_predictions

def test(network, loss_fn, data_loader, device, return_confusion_matrix=False):
    test_loss = 0.0
    num_batches = 0
    total_predictions = 0
    correct_predictions = 0

    confusion_matrix = [[0 for _ in range(11)] for _ in range(11)]

    network.eval()
    with torch.no_grad():
        for batch, labels, task_indices in data_loader:
            batch, labels = batch.to(device), labels.to(device)
      
            outputs = network(batch)
            loss = loss_fn(outputs, labels)

            test_loss += loss.item()
            _, predicted = outputs.max(1)

            total_predictions += labels.size(0)
            correct_predictions += predicted.eq(labels).sum().item()
            num_batches += 1

            if return_confusion_matrix:
                for pred, label in zip(predicted, labels):
                    confusion_matrix[pred][label] += 1

    if return_confusion_matrix:
        return test_loss / num_batches , correct_predictions / total_predictions, confusion_matrix 
    
    return test_loss / num_batches , correct_predictions / total_predictions 



In [None]:
#@title Define the benchmark ~ dataloader 

from avalanche.benchmarks.classic.clear import CLEAR

# normalize with the metrics from ImageNet
normalize = torchvision.transforms.Normalize(
  mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
train_transform = torchvision.transforms.Compose(
  [
    torchvision.transforms.Resize(224),
    torchvision.transforms.RandomCrop(224),
    torchvision.transforms.ToTensor(),
    normalize,
  ]
)
test_transform = torchvision.transforms.Compose(
  [
    torchvision.transforms.Resize(224),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    normalize,
  ]
)


benchmark = CLEAR(
  data_name='clear10',
  evaluation_protocol='iid', # streaming = test on the next task | iid = test on a 30% test split from the current task 
  feature_type=None, # return RGB images, not features extracted by pre-trained models
  seed=0,
  train_transform=train_transform,
  eval_transform=test_transform,
  dataset_root='/kaggle/input/dl-dataset',
)


In [None]:
#@title metrics

# intended behavior:
# def my_metric(batch_logits, labels) -> tensor with the same len as the batch,
# containing the calculated metric for each sample in the batch, based on the logits and the gt labels

def margin(batch, labels):
    '''
    The margin for a sample is the difference between the logit of the correct class
    and the other largest logit. For misclassified samples the margin is negative.
    '''
    # batch_logits -> (N x num_classes)
    # labels -> (N,)
    batch_logits = nn.functional.softmax(batch, dim=1)
    
    label_logits = torch.gather(batch_logits, 1, labels.unsqueeze(1)).squeeze()
    label_logits = label_logits
    for i in range(labels.shape[0]):
        batch_logits[i, labels[i]] = 0

        other_logits = torch.max(batch_logits, dim=1)[0]

    margins = label_logits - other_logits
    return margins.to('cpu') # I don't need this metric to be on the gpu


def entropy(batch, labels):
    ''' 
    Return a tensor of the same shape as labels, containing the entropy of the logits for each sample 
    '''
    batch_logits = nn.functional.softmax(batch, dim=1)
    entropy = -(batch_logits * torch.log(batch_logits)).sum(dim=1) # log or log2

    return entropy.to('cpu')


def el2n(batch, labels, num_classes=11):
    '''
    Return a tensor of the same shape as labels, containing the EL2N for the current batch of logits
    '''
    batch_logits = nn.functional.softmax(batch, dim=1)
    one_hot_labels = nn.functional.one_hot(labels, num_classes=num_classes)
    batch_logits = batch_logits

    x = batch_logits - one_hot_labels
    el2n = torch.linalg.vector_norm(x, dim=1)

    return el2n.to('cpu')

def random_sampling(batch, labels):
    '''
    Return a tensor of random numbers -> samples will be chosen randomly
    '''
    
    return torch.rand(labels.shape[0])
    



In [None]:
#@title get subset

class IdxDataset(torch.utils.data.Dataset):
    
    def __init__(self, dataset_):
        self.ds = dataset_
    
    def __getitem__(self, index):
        data, target, _ = self.ds[index] # benchmark datasets also return the task_index
        
        return data, target, index

    def __len__(self):
        return len(self.ds)

def select_top_samples(net, device, dataset, metric, mode='max', fraction=0.01, offset=0):
    '''
    Select top fraction samples from the dataset, based on the metric provided. Use mode='max'
    if the sample with the highest values should be taken, or 'min' otherwise.
    fraction - a number between 0.01 and 0.99 (0.1 would be a more practical upper bound)
    offset - number of top samples to ignore - in case there are misslabeled datapoints
            - make sure offset + fraction * len(dataset) <= len(dataset)
    '''
    metric_scores = torch.full((len(dataset),), -128 if mode=='max' else 128, dtype=torch.float32)
    idx_dataset = IdxDataset(dataset)
    sel_loader = DataLoader(idx_dataset, batch_size=128, shuffle=False, num_workers=2)

    with torch.no_grad():
        for batch, labels, idxs in sel_loader:
            batch, labels = batch.to(device), labels.to(device)
            output = net(batch)
            
            batch_metrics = metric(output, labels)
            metric_scores[idxs] = batch_metrics

    sample_args = torch.argsort(metric_scores, descending=(mode=='max')) # get indices that sort the samples
    
    num_samples_to_take = int(fraction * len(dataset))
    selected_samples = sample_args[offset: offset+num_samples_to_take]
    sampled_subset = torch.utils.data.Subset(dataset, selected_samples)
    
    return sampled_subset


In [None]:
#@title Initialize model and optimizer
BATCH_SIZE = 64
BATCH_SIZE_TEST = 128
NUM_MAX_EPOCHS_TASK = 40
NUM_MIN_EPOCHS_TASK = 20
PATIENCE = 5
loss_threshold = 0
optimizer_lr = 1e-2
metric_used = random_sampling
mode_used='max'
fraction_used = 0.05
offset_used = 0
test_name = 'random_5_3'

target_device = 'cpu'
if torch.cuda.is_available():
    target_device = 'cuda'
else:
    sys.exit('CUDA not available.')

device = torch.device(target_device)

model = torchvision.models.resnet18(weights=None, num_classes= 11)
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=optimizer_lr, momentum=0.9, weight_decay=1e-5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_MAX_EPOCHS_TASK, verbose=False)
criterion = nn.CrossEntropyLoss()


In [None]:
#@title Iterate over tasks and save metrics
from tqdm import tqdm


train_accs = np.zeros((10, NUM_MAX_EPOCHS_TASK), dtype=np.float32)
train_losses = np.zeros((10, NUM_MAX_EPOCHS_TASK), dtype=np.float32)

test_accs = np.zeros((10, NUM_MAX_EPOCHS_TASK), dtype=np.float32)
test_losses = np.zeros((10, NUM_MAX_EPOCHS_TASK), dtype=np.float32)

accuracy_matrix = np.zeros((10, 10), dtype=np.float32)
confusion_matrices = [] # confusion_matrices[exp, task] is the confusion matrix after experience {exp} on task {task} 
ratios_during_training = {} # ratios of neurons not activating during training
                            # (integer) exp: [ratios] # ratios saved every 5 epochs and once more after the last training epoch on each experience  
subsets_memory = []

for exp_index, (experience, test_experience) in tqdm(enumerate(zip(benchmark.train_stream, benchmark.test_stream))):
    g = torch.Generator()
    g.manual_seed(seed)

    train_dataset = experience.dataset
    # concat the current experience with the saved subsets from previous experiences
    train_dataset = torch.utils.data.ConcatDataset([train_dataset, *subsets_memory]) 
    print("train_dataset length: ", len(train_dataset))
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
        num_workers=2, worker_init_fn=seed_worker, generator=g)
    test_dataset = test_experience.dataset
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE_TEST, shuffle=False, num_workers=2)
    
    # init for early stopping
    last_loss = float('inf') 
    anger = 0
    best_test_acc = 0
    # ratios list duting training on current experience
    current_exp_ratios = []
    # init optimizer lr and the scheduler before every experience
    print('Optimizer lr:', optimizer.param_groups[0]['lr'])
    optimizer.param_groups[0]['lr'] = optimizer_lr
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_MAX_EPOCHS_TASK, verbose=False)
    for epoch in tqdm(range(1, NUM_MAX_EPOCHS_TASK+1)):
        train_loss, train_acc = train(model, optimizer, criterion, train_loader, device)
        train_losses[exp_index, epoch-1] = train_loss
        train_accs[exp_index, epoch-1] = train_acc

        test_loss, test_acc = test(model, criterion, test_loader, device)
        test_losses[exp_index, epoch-1] = test_loss
        test_accs[exp_index, epoch-1] = test_acc

        scheduler.step()
        
        # save model with the best acc on validation/test
        if test_acc > best_test_acc:
            torch.save(model.state_dict(), f'./{test_name}_model_{exp_index}.pt')
            best_test_acc = test_acc
        
        # early stopping criterion - test loss didn't improve by at least {loss_threshold} in the last {PATIENCE} epochs
        if last_loss - test_loss < loss_threshold:
            anger += 1
            if epoch >= NUM_MIN_EPOCHS_TASK and anger >= PATIENCE:
                break
        else:
            anger = 0
            last_loss = test_loss
    
    # load back the weights from the best model
    model.load_state_dict(torch.load(f'./{test_name}_model_{exp_index}.pt'))
    
    exp_conf_matrices = []
    # test on all the test_tasks for the accuracy matrix
    for j, exp in tqdm(enumerate(benchmark.test_stream)):
        test_dataset = exp.dataset
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE_TEST, shuffle=False, num_workers=2)

        test_loss, test_acc, conf_matrix = test(model, criterion, test_loader, device, return_confusion_matrix=True)
        accuracy_matrix[exp_index, j] = test_acc * 100
        exp_conf_matrices.append(conf_matrix)
    
    confusion_matrices.append(exp_conf_matrices)

    # get subset for the current experience
    exp_subset = select_top_samples(model, device, experience.dataset, metric_used, mode_used, fraction_used, offset_used)
    subsets_memory.append(exp_subset)


In [None]:
## save acc matrix and train metrics at the end
try:
    with open(f'./{test_name}_acc_matrix.npy', 'wb') as f:
        np.save(f, accuracy_matrix)
except:
    print("saving acc matrix failed")
    print(accuracy_matrix)
    
try:
    confusion_matrices = np.array(confusion_matrices, dtype=np.int32)
    with open(f'./{test_name}_conf_matrices.npy', 'wb') as f:
        np.save(f, confusion_matrices)
except:
    print("saving conf matrices failed")
    print(confusion_matrices)

try:
    with open(f'./{test_name}_train_accs.npy', 'wb') as f:
        np.save(f, train_accs)
except:
    print("saving train accs failed")

try:
    with open(f'./{test_name}_train_losses.npy', 'wb') as f:
        np.save(f, train_losses)
except:
    print("saving train_losses failed")

try:
    with open(f'./{test_name}_test_accs.npy', 'wb') as f:
        np.save(f, test_accs)
except:
    print("saving test accs failed")
    print(test_accs)

try:
    with open(f'./{test_name}_test_losses.npy', 'wb') as f:
        np.save(f, test_losses)
except:
    print("saving test losses failed")


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

s = sns.heatmap(accuracy_matrix, cmap='Blues', annot=True, 
    xticklabels=[f'Test {t}' for t in range(1,11)],
    yticklabels=[f'Time {t}' for t in range(1,11)],
    square=True, fmt='.1f'
)

plt.savefig(f'./{test_name}_acc_matrix.jpg', bbox_inches='tight')
plt.show()
