In [1]:
from avalanche.benchmarks import SplitMNIST

benchmark = SplitMNIST(n_experiences=2)

train_stream = benchmark.train_stream
test_stream = benchmark.test_stream

for experience in train_stream: 
    print("Start of task ", experience.task_label)
    print('Classes in this task:', experience.classes_in_this_experience)

    current_training_set = experience.dataset
    print('Task {}'.format(experience.task_label))
    print('This task contains', len(current_training_set), 'training examples')

    current_test_set = test_stream[experience.current_experience].dataset
    print('This task contains', len(current_test_set), 'test examples')


  Referenced from: <5AA8DD3D-A2CC-31CA-8060-88B4E9C18B09> /Users/daniyarzakarin/miniconda3/envs/clenv/lib/python3.10/site-packages/torchvision/image.so
  warn(
  from .autonotebook import tqdm as notebook_tqdm


Start of task  0
Classes in this task: [0, 1, 2, 7, 8]
Task 0
This task contains 30739 training examples
This task contains 5149 test examples
Start of task  0
Classes in this task: [3, 4, 5, 6, 9]
Task 0
This task contains 29261 training examples
This task contains 4851 test examples


Defining the file logger

In [2]:
from avalanche.training.plugins import EvaluationPlugin
from avalanche.evaluation.metrics import forgetting_metrics, \
    accuracy_metrics, loss_metrics, timing_metrics, cpu_usage_metrics, \
    confusion_matrix_metrics, disk_usage_metrics, StreamConfusionMatrix
from avalanche.logging import InteractiveLogger

text_logger = InteractiveLogger()
eval_plugin = EvaluationPlugin(
    accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    forgetting_metrics(experience=True, stream=True),
    StreamConfusionMatrix(num_classes=benchmark.n_classes, save_image=False),
    loggers=[text_logger]
)

Models and Strategies

In [3]:
from avalanche.models import SimpleMLP
from avalanche.training import Naive, EWC
from torch.optim import SGD
from torch.nn import CrossEntropyLoss

mlp_naive = SimpleMLP(num_classes=benchmark.n_classes)
naive_strategy = Naive(
    model = mlp_naive, 
    optimizer = SGD(mlp_naive.parameters(), lr=0.001, momentum=0.9),
    criterion = CrossEntropyLoss(), 
    train_mb_size=500, 
    train_epochs=10, 
    eval_mb_size=100,
    evaluator=eval_plugin)

mlp_ewc = SimpleMLP(num_classes=benchmark.n_classes)
ewc_strategy = EWC(
    model = mlp_ewc, 
    optimizer = SGD(mlp_ewc.parameters(), lr=0.001, momentum=0.9),
    criterion = CrossEntropyLoss(), 
    ewc_lambda = 2000,
    train_mb_size=500, 
    train_epochs=10, 
    eval_mb_size=100,
    evaluator=eval_plugin)



Training

In [4]:
results_naive = []
print('Starting experiment...')

for exp_id, experience in enumerate(train_stream):
    print("Start of experience ", experience.current_experience)

    naive_strategy.train(experience)
    print('Training completed')

    print('Computing accuracy on the current test set')
    results_naive.append(naive_strategy.eval(benchmark.test_stream[exp_id]))

print('Final evaluation...')
naive_strategy.eval(benchmark.test_stream)

Starting experiment...
Start of experience  0
-- >> Start of training phase << --
100%|██████████| 62/62 [00:05<00:00, 12.19it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 1.2072
	Loss_MB/train_phase/train_stream/Task000 = 0.4758
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.6856
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.9038
100%|██████████| 62/62 [00:04<00:00, 13.11it/s]
Epoch 1 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.3935
	Loss_MB/train_phase/train_stream/Task000 = 0.3483
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9033
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.8912
100%|██████████| 62/62 [00:05<00:00, 12.12it/s]
Epoch 2 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.2881
	Loss_MB/train_phase/train_stream/Task000 = 0.2399
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9202
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.9163
100%|██████████| 62/62 [00:04<00:00, 14.24it/s]
Epoch 3 ended.
	Loss_Epoch/

{'Top1_Acc_MB/train_phase/train_stream/Task000': 0.9272030651340997,
 'Loss_MB/train_phase/train_stream/Task000': 0.20209628343582153,
 'Top1_Acc_Epoch/train_phase/train_stream/Task000': 0.9190389938826424,
 'Loss_Epoch/train_phase/train_stream/Task000': 0.25599117917000264,
 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp000': 0.009322198485142746,
 'Loss_Exp/eval_phase/test_stream/Task000/Exp000': 5.7928380825422785,
 'Top1_Acc_Stream/eval_phase/test_stream/Task000': 0.4572,
 'Loss_Stream/eval_phase/test_stream/Task000': 3.085521245234832,
 'StreamForgetting/eval_phase/test_stream': 0.9489221208001554,
 'ConfusionMatrix_Stream/eval_phase/test_stream': tensor([[  0,   0,   0,  35,   3, 879,  56,   0,   0,   7],
         [  0,  48,   0, 990,   2,  81,  13,   0,   0,   1],
         [  0,   0,   0, 632,  46,  31, 273,   0,   0,  50],
         [  0,   0,   0, 953,   3,  29,   4,   0,   0,  21],
         [  0,   0,   0,   2, 929,   2,  15,   0,   0,  34],
         [  0,   0,   0,  38,  12

In [14]:
results_ewc = []
print('Starting experiment...')

for exp_id, experience in enumerate(train_stream):
    print("Start of experience ", experience.current_experience)

    ewc_strategy.train(experience)
    print('Training completed')

    print('Computing accuracy on the current test set')
    results_ewc.append(ewc_strategy.eval(benchmark.test_stream[exp_id]))

print('Final evaluation...')
ewc_strategy.eval(benchmark.test_stream)

Starting experiment...
Start of experience  0
-- >> Start of training phase << --
100%|██████████| 61/61 [00:04<00:00, 13.46it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 1.2564
	Loss_MB/train_phase/train_stream/Task000 = 0.5641
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.6558
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.8842
100%|██████████| 61/61 [00:04<00:00, 13.94it/s]
Epoch 1 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.4232
	Loss_MB/train_phase/train_stream/Task000 = 0.3516
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.8949
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.9243
100%|██████████| 61/61 [00:04<00:00, 13.79it/s]
Epoch 2 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.3088
	Loss_MB/train_phase/train_stream/Task000 = 0.2859
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9129
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.9125
100%|██████████| 61/61 [00:04<00:00, 13.86it/s]
Epoch 3 ended.
	Loss_Epoch/

{'Top1_Acc_MB/train_phase/train_stream/Task000': 0.922077922077922,
 'Loss_MB/train_phase/train_stream/Task000': 0.21081849932670593,
 'Top1_Acc_Epoch/train_phase/train_stream/Task000': 0.9380599790377658,
 'Loss_Epoch/train_phase/train_stream/Task000': 0.23651596240989736,
 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp000': 0.10818713450292397,
 'Loss_Exp/eval_phase/test_stream/Task000/Exp000': 3.9334067754578173,
 'Top1_Acc_Stream/eval_phase/test_stream/Task000': 0.5186,
 'Loss_Stream/eval_phase/test_stream/Task000': 2.0981782685741783,
 'StreamForgetting/eval_phase/test_stream': 0.849317738791423,
 'ConfusionMatrix_Stream/eval_phase/test_stream': tensor([[  0,   0,   0,  40,   1, 836,  96,   7,   0,   0],
         [  0, 497,   0, 468,   6,  37,  11, 116,   0,   0],
         [  0,   0,  58, 523,  65,  32, 304,  50,   0,   0],
         [  0,   0,   0, 955,   2,  23,   4,  26,   0,   0],
         [  0,   0,   0,   1, 955,   3,  16,   7,   0,   0],
         [  0,   0,   0,  43,  16, 

In [68]:
from json import load
from utilities import get_hessian_eigenvalues
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset, DataLoader
from torch.nn.utils import parameters_to_vector, vector_to_parameters
import torch.optim as optim

# Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        # self.fc1 = nn.Linear(input_size, hidden_size)
        # self.relu = nn.ReLU()
        # self.fc2 = nn.Linear(hidden_size, output_size)
        self.fc = nn.Linear(input_size, output_size, bias = False)

    def forward(self, x):
        # x = self.fc1(x)
        # x = self.relu(x)
        # x = self.fc2(x)
        x = self.fc(x)
        return x

# Initialize model, loss function, and optimizer
input_size = 10      # number of input features
hidden_size = 20     # number of hidden units
output_size = 1      # number of output units

model = MLP(input_size, hidden_size, output_size)
criterion = nn.MSELoss()           # Mean Squared Error Loss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Dummy data for demonstration
inputs = torch.randn(64, input_size)   # 64 samples, each with `input_size` features
targets = torch.randn(64, output_size) # Corresponding targets
learning_rate = 0.01
# Training loop
num_epochs = 100
dataset = TensorDataset(inputs, targets)
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    evals, evecs = get_hessian_eigenvalues(model, criterion, dataset, physical_batch_size=64)
    evecs.transpose_(1, 0)
    # evecs = torch.transpose(evecs)
    print(f'evecs.shape : {evecs.shape}')
    # print(f'proj_tensor: {proj_tensor.shape}')

    # Backward pass
    # loss.backward()  # Compute gradients
    
    # Manually update weights
    with torch.no_grad():  # Ensure we don’t track these operations for gradient computation
        grad = torch.autograd.grad(loss, inputs=model.parameters(), create_graph=True)
        vec_grad = parameters_to_vector(grad)
        print(f'vec_grad.shape: {vec_grad.shape}')
        step = torch.Tensor(vec_grad.shape)
        for vec in evecs:
            step -= learning_rate * torch.dot(vec_grad, vec) * vec  # Update each parameter by gradient descent
        vec_params = parameters_to_vector(model.parameters())
        vec_params += step
        vector_to_parameters(vec_params, model.parameters())
        model.zero_grad()
    
    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}]')


# 
# loader = DataLoader(dataset, batch_size=64, shuffle=False)
# for kek, lol in loader:
#     print(kek.shape, lol.shape)
# 

evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
Epoch [10/100]
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shape: torch.Size([10])
evecs.shape : torch.Size([6, 10])
vec_grad.shap

In [None]:
def projected_step():
    evals, evecs = get_hessian_eigenvalues(model, criterion, dataset, physical_batch_size=64)
    evecs.transpose_(1, 0)
    # evecs = torch.transpose(evecs)
    print(f'evecs.shape : {evecs.shape}')
    # print(f'proj_tensor: {proj_tensor.shape}')

    # Backward pass
    # loss.backward()  # Compute gradients
    
    # Manually update weights
    with torch.no_grad():  # Ensure we don’t track these operations for gradient computation
        grad = torch.autograd.grad(loss, inputs=model.parameters(), create_graph=True)
        vec_grad = parameters_to_vector(grad)
        print(f'vec_grad.shape: {vec_grad.shape}')
        step = torch.Tensor(vec_grad.shape)
        for vec in evecs:
            step -= learning_rate * torch.dot(vec_grad, vec) * vec  # Update each parameter by gradient descent
        vec_params = parameters_to_vector(model.parameters())
        vec_params += step
        vector_to_parameters(vec_params, model.parameters())
        model.zero_grad()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader, Subset
import numpy as np

# Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the image to a 784-dimensional vector
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Hyperparameters
input_size = 28 * 28  # MNIST images are 28x28 pixels
hidden_size = 128     # Number of units in the hidden layer
output_size = 10      # Number of classes for MNIST digits (0–9)
batch_size = 64
learning_rate = 0.001
num_epochs = 5


# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
full_train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)

# Create a subset of 10% of the training dataset
train_size = int(0.1 * len(full_train_dataset))  # 10% of the data
indices = np.random.choice(len(full_train_dataset), train_size, replace=False)
train_dataset = Subset(full_train_dataset, indices)

# Use the entire test dataset for evaluation
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the model, loss function, and optimizer
model = MLP(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(num_epochs):
    sum_loss = torch.tensor(0.)
    for images, labels in train_loader:
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        sum_loss += loss
    print(f'On epoch = {epoch} loss = {sum_loss}')
    evals, evecs = get_hessian_eigenvalues(model, criterion, train_dataset, physical_batch_size=64)
    evecs.transpose_(1, 0)
    # Manually update weights
    with torch.no_grad():  # Ensure we don’t track these operations for gradient computation
        grad = torch.autograd.grad(loss, inputs=model.parameters(), create_graph=True)
        vec_grad = parameters_to_vector(grad)
        print(f'vec_grad.shape: {vec_grad.shape}')
        step = torch.Tensor(vec_grad.shape)
        for vec in evecs:
            step -= learning_rate * torch.dot(vec_grad, vec) * vec  # Update each parameter by gradient descent
        vec_params = parameters_to_vector(model.parameters())
        vec_params += step
        vector_to_parameters(vec_params, model.parameters())
        model.zero_grad()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Testing loop
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient computation
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy on the test set: {100 * correct / total:.2f}%')

On epoch = 0 loss = 216.1085662841797


In [2]:
print(evecs.shape)

torch.Size([241, 6])
