In [298]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, cohen_kappa_score
from PIL import Image
import random
import torch
from torch import autograd
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

# Define the transform for MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


# Set random seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

# Load and transform the datasets
transform = transforms.Compose([transforms.ToTensor()])

# Standard MNIST for Task A (Digit Classification)
train_A_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_A_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Function to create a balanced subset
def create_balanced_subset(dataset, num_samples_per_class):
    class_indices = {i: [] for i in range(10)}  # Store indices for each class (0-9)

    # Sort dataset into classes
    for idx, (_, target) in enumerate(dataset):
        class_indices[target].append(idx)

    # Randomly select num_samples_per_class indices per class
    subset_indices = []
    for class_label, indices in class_indices.items():
        subset_indices.extend(np.random.choice(indices, num_samples_per_class, replace=False))

    # Create the subset
    subset = Subset(dataset, subset_indices)
    return subset

# Create a subset with 100 samples per digit
train_A_dataset = create_balanced_subset(train_A_dataset, num_samples_per_class=1500)
test_A_dataset = create_balanced_subset(test_A_dataset, num_samples_per_class=150)



# Custom dataset for Task B (Even-Odd Classification)
class EvenOddMNIST(Dataset):
    def __init__(self, mnist_dataset):
        self.mnist_dataset = mnist_dataset

    def __getitem__(self, index):
        img, target = self.mnist_dataset[index]
        even_odd_target = target % 2  # 0 for even, 1 for odd
        return img, even_odd_target

    def __len__(self):
        return len(self.mnist_dataset)

train_B_dataset = EvenOddMNIST(train_A_dataset)
test_B_dataset = EvenOddMNIST(test_A_dataset)

class TaskIdentifierDataset(Dataset):
    def __init__(self, dataset, task_id):
        self.dataset = dataset
        self.task_id = task_id

    def __getitem__(self, index):
        img, target = self.dataset[index]
        task_identifier = torch.tensor([self.task_id], dtype=torch.float32)
        return img, target, task_identifier

    def __len__(self):
        return len(self.dataset)

# Wrapping the original datasets with task identifiers
train_A_dataset_with_id = TaskIdentifierDataset(train_A_dataset, 1.0)
test_A_dataset_with_id = TaskIdentifierDataset(test_A_dataset, 1.0)
train_B_dataset_with_id = TaskIdentifierDataset(train_B_dataset, 0.0)
test_B_dataset_with_id = TaskIdentifierDataset(test_B_dataset, 0.0)

# Split into training and validation sets for Task A and Task B
train_size_A = int(0.8 * len(train_A_dataset_with_id))  # 80% for training
val_size_A = len(train_A_dataset_with_id) - train_size_A  # 20% for validation

train_size_B = int(0.8 * len(train_B_dataset_with_id))  # 80% for training
val_size_B = len(train_B_dataset_with_id) - train_size_B  # 20% for validation

train_A_dataset_with_id, val_A_dataset_with_id = random_split(train_A_dataset_with_id, [train_size_A, val_size_A])
train_B_dataset_with_id, val_B_dataset_with_id = random_split(train_B_dataset_with_id, [train_size_B, val_size_B])

# Create DataLoaders
train_A_loader = DataLoader(train_A_dataset_with_id, batch_size=64, shuffle=True)
val_A_loader = DataLoader(val_A_dataset_with_id, batch_size=64, shuffle=False)
test_A_loader = DataLoader(test_A_dataset_with_id, batch_size=64, shuffle=False)

train_B_loader = DataLoader(train_B_dataset_with_id, batch_size=64, shuffle=True)
val_B_loader = DataLoader(val_B_dataset_with_id, batch_size=64, shuffle=False)
test_B_loader = DataLoader(test_B_dataset_with_id, batch_size=64, shuffle=False)


In [299]:
# Check the size of the subset
print(f"train_A_dataset size: {len(train_A_dataset)}")
print(f"train_B_dataset size: {len(train_B_dataset)}")
print(f"test_B_dataset size: {len(test_B_dataset)}")
print(f"test_A_dataset size: {len(test_A_dataset)}")

train_A_dataset size: 15000
train_B_dataset size: 15000
test_B_dataset size: 1500
test_A_dataset size: 1500


In [300]:
class ConvNet(nn.Module):
    def __init__(self, shared_dim=128, output_dim_A=10, output_dim_B=2):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 28 * 28 + 1, shared_dim)  # Adjusted to match the output size after conv layers + task identifier
        self.fc2_A = nn.Linear(shared_dim, output_dim_A)
        self.fc2_B = nn.Linear(shared_dim, output_dim_B)

    def forward(self, x, task_id):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        task_id = task_id.view(-1, 1)  # Ensure task_id has shape (batch_size, 1)
        x = torch.cat((x, task_id), dim=1)  # Concatenate task identifier
        x = F.relu(self.fc1(x))

        # Generate outputs for both tasks
        output_A = self.fc2_A(x)
        output_B = self.fc2_B(x)

        # Combine the outputs into a single tensor with proper shape
        output = torch.zeros(x.size(0), max(output_A.size(1), output_B.size(1)), device=x.device)
        output[:, :output_A.size(1)] = output_A * (task_id == 1).float()
        output[:, :output_B.size(1)] += output_B * (task_id == 0).float()

        return output


In [301]:
# Training function
def train(model, loader, optimizer, criterion, epochs=20):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for data, target, task_id in loader:  # Ensure the DataLoader returns three items
            optimizer.zero_grad()
            output = model(data, task_id)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(loader)}')

# Evaluation function
def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for data, target, task_id in loader:  # Ensure the DataLoader returns three items
            output = model(data, task_id)
            loss = criterion(output, target)
            total_loss += loss.item()
            predictions = torch.argmax(output, dim=1)
            all_targets.extend(target.numpy())
            all_predictions.extend(predictions.numpy())

    accuracy = accuracy_score(all_targets, all_predictions)
    precision = precision_score(all_targets, all_predictions, average='weighted', zero_division=0)
    recall = recall_score(all_targets, all_predictions, average='weighted', zero_division=0)
    kappa = cohen_kappa_score(all_targets, all_predictions)
    
    return accuracy, precision, recall, kappa


In [302]:
def train_with_patience(model, loader, optimizer, criterion, epochs=20, patience=5):
    model.train()
    best_loss = float('inf')
    no_improvement = 0
    epoch_accuracy = []
    epochs_loss = []

    for epoch in range(epochs):
        epoch_loss = 0
        correct = 0
        total = 0
        for data, target, task_id in loader:  # Ensure the DataLoader returns three items
            optimizer.zero_grad()
            output = model(data, task_id)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

        avg_loss = epoch_loss / len(loader)
        accuracy = correct / total
        epoch_accuracy.append(accuracy)
        epochs_loss.append(avg_loss)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}')

        # Check for improvement
        if avg_loss < best_loss:
            best_loss = avg_loss
            no_improvement = 0
        else:
            no_improvement += 1

        # Early stopping criteria
        if no_improvement >= patience:
            print(f"Stopping early due to no improvement in loss for {patience} consecutive epochs.")
            break

    return epoch_accuracy, epochs_loss


In [303]:
import torch
import torch.nn.functional as F

class EWC:
    def __init__(self, model, dataloader, importance=10000):
        self.model = model
        self.dataloader = dataloader
        self.importance = importance
        self.initial_params = {n: p.clone() for n, p in self.model.named_parameters()}
        self.fisher_diagonal = self.compute_fisher_information()

    def compute_fisher_information(self):
        fisher_diagonal = {n: torch.zeros_like(p) for n, p in self.model.named_parameters()}
        self.model.eval()

        for i, (data, target, task_id) in enumerate(self.dataloader, 1):
            self.model.zero_grad()
            output = self.model(data, task_id)
            loss = F.nll_loss(F.log_softmax(output, dim=1), target)
            loss.backward()

            for n, p in self.model.named_parameters():
                if p.grad is not None:
                    grad_squared = p.grad ** 2
                    # Online update for Fisher diagonal using incremental average
                    fisher_diagonal[n] = fisher_diagonal[n] + (grad_squared - fisher_diagonal[n]) / i
                else:
                    print(f"Warning: Gradient for parameter {n} is None.")

        return fisher_diagonal

    def penalty(self):
        penalty = 0
        for n, p in self.model.named_parameters():
            penalty += (self.fisher_diagonal[n] * (p - self.initial_params[n]) ** 2).sum()
        return self.importance * penalty


In [304]:
def train_with_ewc(model, loader, ewc, optimizer, criterion, epochs=20):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        total_penalty = 0
        for data, target, task_id in loader:
            optimizer.zero_grad()
            output = model(data, task_id)
            loss = criterion(output, target)
            penalty = ewc.penalty()
            total_loss += loss.item()
            total_penalty += penalty.item()
            loss += penalty
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(loader):.4f}, Penalty: {total_penalty / len(loader):.4f}')


In [305]:
def train_with_ewc_with_patience(model, loader, ewc, optimizer, criterion, epochs=20, patience=5):
    model.train()
    best_loss = float('inf')
    no_improvement = 0
    epoch_accuracy = []
    epochs_loss = []

    for epoch in range(epochs):
        total_loss = 0
        total_penalty = 0
        total = 0
        correct = 0
        for data, target, task_id in loader:
            optimizer.zero_grad()
            output = model(data, task_id)
            loss = criterion(output, target)
            penalty = ewc.penalty()
            total_loss += loss.item()
            total_penalty += penalty.item()
            loss += penalty
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

        avg_loss = total_loss / len(loader)
        avg_penalty = total_penalty / len(loader)
        accuracy = correct / total
        epoch_accuracy.append(accuracy)
        epochs_loss.append(avg_loss)
        
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}, Penalty: {avg_penalty:.4f}')

        # Check for improvement
        if avg_loss < best_loss:
            best_loss = avg_loss
            no_improvement = 0
        else:
            no_improvement += 1

        # Early stopping criteria
        if no_improvement >= patience:
            print(f"Stopping early due to no improvement in loss for {patience} consecutive epochs.")
            break


    return epoch_accuracy, epochs_loss


In [306]:
class SynapticIntelligence:
    def __init__(self, model, dataloader, importance=1000, device='cpu'):
        self.model = model
        self.dataloader = dataloader
        self.importance = importance
        self.device = device
        self.saved_params = {}
        self.omega = {}

        for n, p in self.model.named_parameters():
            if p.requires_grad:
                self.saved_params[n] = p.clone().detach().to(self.device)
                self.omega[n] = torch.zeros_like(p).to(self.device)

    def update_omega(self, batch_loss, lr):
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                if p.grad is not None:
                    self.omega[n] += p.grad * (p.detach() - self.saved_params[n])
                    self.saved_params[n] = p.clone().detach()

    def penalty(self):
        loss = 0
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                _loss = self.omega[n] * (p - self.saved_params[n]) ** 2
                loss += _loss.sum()
        return self.importance * loss

    def end_task(self):
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                self.omega[n] /= len(self.dataloader)


In [307]:
def train_with_si(model, loader, si, optimizer, criterion, epochs=20):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data, target, task_id in loader:
            optimizer.zero_grad()
            output = model(data, task_id)
            loss = criterion(output, target)
            loss += si.penalty()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            si.update_omega(loss, optimizer.param_groups[0]['lr'])
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(loader):.4f}')
    si.end_task()


In [308]:
def train_with_si(model, loader, optimizer, criterion, si, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        total_penalty = 0
        for data, target,task_id in loader:
            optimizer.zero_grad()
            output = model(data,task_id)
            loss = criterion(output, target)
            penalty = si.penalty()
            total_loss += loss.item()
            total_penalty += penalty.item()
            loss += penalty
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(loader)}, Penalty: {total_penalty / len(loader)}')

In [319]:
def train_with_si(model, optimizer, dataloaders, si=None, lambda_si=0.1, epochs=5):
    """
    Train the model with or without SI penalty.

    Args:
        model: Neural network model to train.
        optimizer: Optimizer for training.
        dataloaders: Dictionary containing 'train' and optionally 'val' dataloaders.
        si: Instance of the SynapticIntelligence class, or None if not using SI.
        lambda_si: Importance of the SI penalty (default is 0.1).
        epochs: Number of epochs to train.
    """
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        total_penalty = 0.0

        for inputs, targets,task_id in dataloaders['train']:
            optimizer.zero_grad()
            outputs = model(inputs,task_id)
            loss = criterion(outputs, targets)

            # Add SI penalty if applicable
            if si is not None:
                si_penalty = si.penalty(model)
                loss += lambda_si * si_penalty
                total_penalty += si_penalty.item()

            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Log losses
        avg_loss = running_loss / len(dataloaders['train'])
        avg_penalty = total_penalty / len(dataloaders['train'])
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Penalty: {avg_penalty:.4f}")


In [323]:
class SynapticIntelligence:
    def __init__(self, model, dataloader):
        """
        Synaptic Intelligence (SI) implementation.
        Args:
            model: Neural network model.
            dataloader: DataLoader for the current task.
        """
        self.model = model
        self.dataloader = dataloader
        self.omega = {}
        self.optimal_params = {}

        # Initialize omega and optimal_params
        self._initialize_params()

    def _initialize_params(self):
        """
        Initialize or reset omega and optimal_params to match the current model's parameters.
        """
        self.omega = {n: torch.zeros_like(p) for n, p in self.model.named_parameters() if p.requires_grad}
        self.optimal_params = {n: p.clone().detach() for n, p in self.model.named_parameters() if p.requires_grad}

    def update_omega(self):
        self.model.eval()
        fisher_diagonal = {n: torch.zeros_like(p) for n, p in self.model.named_parameters() if p.requires_grad}
        
        dataset_size = len(self.dataloader.dataset)  # Dataset size for normalization
        for i, (data, target,task_id) in enumerate(self.dataloader, 1):
            self.model.zero_grad()
            output = self.model(data,task_id)
            loss = F.nll_loss(F.log_softmax(output, dim=1), target)
            loss.backward()
            
            for n, p in self.model.named_parameters():
                if p.grad is not None:
                    fisher_diagonal[n] += (p.grad ** 2 - fisher_diagonal[n]) / i
        
        for n in fisher_diagonal:
            if n in self.omega:
                self.omega[n] += fisher_diagonal[n] / max(1e-10, fisher_diagonal[n].norm().item())
            else:
                self.omega[n] = fisher_diagonal[n] / max(1e-10, fisher_diagonal[n].norm().item())




    def store_optimal_params(self):
        """
        Store the current model parameters as the optimal parameters after training a task.
        """
        self.optimal_params = {n: p.clone().detach() for n, p in self.model.named_parameters() if p.requires_grad}

    def penalty(self, model):
        penalty = 0.0
        for n, p in model.named_parameters():
            if n in self.omega and n in self.optimal_params:
                diff = p - self.optimal_params[n]
                #print(f"{n} - Diff Max: {diff.max().item()}, Min: {diff.min().item()}")
                term = (self.omega[n] * diff ** 2).sum()
                #print(f"{n} - Term: {term.item()}")
                penalty += term
        return penalty



In [310]:
class RehearsalBuffer:
    def __init__(self, buffer_size=200):
        self.buffer_size = buffer_size
        self.buffer = []

    def add_to_buffer(self, data, target, task_id):
        for i in range(len(data)):
            if len(self.buffer) >= self.buffer_size:
                self.buffer.pop(0)
            self.buffer.append((data[i], target[i], task_id[i]))

    def get_buffer(self):
        data, target, task_id = zip(*self.buffer)
        return torch.stack(data), torch.tensor(target), torch.tensor(task_id)


In [311]:
def train_with_rehearsal(model, loader, buffer, optimizer, criterion, epochs=20):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data, target, task_id in loader:
            buffer.add_to_buffer(data, target, task_id)
            optimizer.zero_grad()
            output = model(data, task_id)
            loss = criterion(output, target)
            if len(buffer.buffer) > 0:
                buffer_data, buffer_target, buffer_task_id = buffer.get_buffer()
                buffer_output = model(buffer_data, buffer_task_id)
                loss += criterion(buffer_output, buffer_target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(loader):.4f}')


In [312]:
class PNN(nn.Module):
    def __init__(self, shared_dim=128, output_dim_A=10, output_dim_B=2):
        super(PNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1_A = nn.Linear(64 * 28 * 28, shared_dim)
        self.fc2_A = nn.Linear(shared_dim, output_dim_A)
        self.fc1_B = nn.Linear(64 * 28 * 28, shared_dim)
        self.fc2_B = nn.Linear(shared_dim, output_dim_B)

    def forward(self, x, task_id):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        if task_id[0] == 1:
            x = F.relu(self.fc1_A(x))
            output = self.fc2_A(x)
        else:
            x = F.relu(self.fc1_B(x))
            output = self.fc2_B(x)
        return output


In [313]:
def train_pnn(model, loader, optimizer, criterion, task_num, epochs=50):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data, target, task_id in loader:
            optimizer.zero_grad()
            output = model(data, task_id)
            loss = criterion(output, target)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(loader):.4f}')

def evaluate_pnn(model, loader, criterion, task_num):
    model.eval()
    total_loss = 0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for data, target, task_id in loader:
            output = model(data, task_id)
            loss = criterion(output, target)
            total_loss += loss.item()
            predictions = torch.argmax(output, dim=1)
            all_targets.extend(target.numpy())
            all_predictions.extend(predictions.numpy())

    accuracy = accuracy_score(all_targets, all_predictions)
    precision = precision_score(all_targets, all_predictions, average='weighted', zero_division=0)
    recall = recall_score(all_targets, all_predictions, average='weighted', zero_division=0)
    kappa = cohen_kappa_score(all_targets, all_predictions)
    
    return accuracy, precision, recall, kappa


In [314]:
epoch_stats_other = {
    "si": {"acc_B": [], "loss_B": [], "task_A_during_B":[],"task_A_during_B_loss":[],"val_task_A_during_B":[],"val_acc_B":[],"A_during_A":[],"B_during_A":[],"A_during_A_val":[],"B_during_A_val":[]},
    "pnn": {"acc_B": [], "loss_B": [], "task_A_during_B":[],"task_A_during_B_loss":[],"val_task_A_during_B":[],"val_acc_B":[],"A_during_A":[],"B_during_A":[],"A_during_A_val":[],"B_during_A_val":[]},
    "reh": {"acc_B": [], "loss_B": [], "task_A_during_B":[],"task_A_during_B_loss":[],"val_task_A_during_B":[],"val_acc_B":[],"A_during_A":[],"B_during_A":[],"A_during_A_val":[],"B_during_A_val":[]}
}

num_runs = 10
epochs = 100

In [327]:
for key in epoch_stats_other["si"]:
    epoch_stats_other["si"][key] = []

In [328]:
# Initialize models, optimizers, and criteria
shared_dim = 128
output_dim_A = 10
output_dim_B = 2
lr = 0.00001
criterion = nn.CrossEntropyLoss()
import json


num_runs = 2
epochs = 50
for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # SI
    model_si = ConvNet(shared_dim=shared_dim, output_dim_A=output_dim_A, output_dim_B=output_dim_B)
    optimizer_si = optim.Adam(model_si.parameters(), lr=lr)
    

    A_during_A_si_run = []
    A_during_A_si_run_val = []
    B_during_A_si_run = []
    B_during_A_si_run_val = []
    
    #model_si.load_state_dict(weights)

    si = SynapticIntelligence(model_si, train_A_loader)
    
    # Train and evaluate with SI
    print("Training with SI on Task A")
    for epoch in range(25):
        train_with_si(model_si, optimizer_si, {'train': train_A_loader, 'val': val_A_loader}, si=None, lambda_si=0, epochs=1)

        # Evaluate on test set for Domain B
        accuracy_taskB_during_A_si, _, _, _ = evaluate(model_si, test_B_loader, criterion)
        B_during_A_si_run.append(accuracy_taskB_during_A_si)

        # Evaluate on validation set for Domain B
        accuracy_taskB_during_A_si_val, _, _, _ = evaluate(model_si, val_B_loader, criterion)
        B_during_A_si_run_val.append(accuracy_taskB_during_A_si_val)

        # Evaluate performance on Domain A during training on Domain B (test and validation)
        accuracy_A_during_A_si, _, _, _ = evaluate(model_si, test_A_loader, criterion)
        A_during_A_si_run.append(accuracy_A_during_A_si)

        accuracy_A_during_A_si_val, _, _, _ = evaluate(model_si, val_A_loader, criterion)
        A_during_A_si_run_val.append(accuracy_A_during_A_si_val)

    epoch_stats_other["si"]["A_during_A"].append(A_during_A_si_run)
    epoch_stats_other["si"]["B_during_A"].append(B_during_A_si_run)

    # Store validation metrics for each run
    epoch_stats_other["si"]["A_during_A_val"].append(A_during_A_si_run_val)
    epoch_stats_other["si"]["B_during_A_val"].append(B_during_A_si_run_val)

    print("Evaluating on Task A with SI")
    accuracy_taskA_si, precision_taskA_si, recall_taskA_si, kappa_taskA_si = evaluate(model_si, test_A_loader, criterion)
    print(accuracy_taskA_si)
    
    task_A_during_B_si_run = []
    task_B_si_run = []
    val_A_during_B_si_run = []
    val_B_si_run = []
    
    
    si.update_omega()
    si.store_optimal_params()
    
    for epoch in range(epochs):
        train_with_si(model_si, optimizer_si, {'train': train_B_loader, 'val': val_B_loader}, si=si, lambda_si=2, epochs=1)

        # Evaluate on test set for Domain B
        accuracy_taskB_during_si, _, _, _ = evaluate(model_si, test_B_loader, criterion)
        task_B_si_run.append(accuracy_taskB_during_si)
        print(accuracy_taskB_during_si)

        # Evaluate on validation set for Domain B
        accuracy_taskB_during_si_val, _, _, _ = evaluate(model_si, val_B_loader, criterion)
        val_B_si_run.append(accuracy_taskB_during_si_val)

        # Evaluate performance on Domain A during training on Domain B (test and validation)
        accuracy_A_during_taskB_during_si, _, _, _ = evaluate(model_si, test_A_loader, criterion)
        task_A_during_B_si_run.append(accuracy_A_during_taskB_during_si)

        accuracy_A_during_taskB_during_si_val, _, _, _ = evaluate(model_si, val_A_loader, criterion)
        val_A_during_B_si_run.append(accuracy_A_during_taskB_during_si_val)
        print(accuracy_A_during_taskB_during_si_val)

    epoch_stats_other["si"]["task_A_during_B"].append(task_A_during_B_si_run)
    epoch_stats_other["si"]["acc_B"].append(task_B_si_run)

    # Store validation metrics for each run
    epoch_stats_other["si"]["val_task_A_during_B"].append(val_A_during_B_si_run)
    epoch_stats_other["si"]["val_acc_B"].append(val_B_si_run)
        
    print("Evaluating on Task B with SI")
    accuracy_taskB_si, precision_taskB_si, recall_taskB_si, kappa_taskB_si = evaluate(model_si, test_B_loader, criterion)
    accuracy_taskB_si_val, precision_taskB_si_val, recall_taskB_si_val, kappa_taskB_si_val = evaluate(model_si, val_B_loader, criterion)
    
    print("Evaluating on Task A after Task B with SI")
    accuracy_taskA_final_si, precision_taskA_final_si, recall_taskA_final_si, kappa_taskA_final_si = evaluate(model_si, test_A_loader, criterion)
    accuracy_taskA_final_si_val, precision_taskA_final_si_val, recall_taskA_final_si_val, kappa_taskA_final_si_val = evaluate(model_si, val_A_loader, criterion)
    
    with open('epoch_stats_other_til_bench_SI_final_test.json', 'w') as f:
        json.dump(epoch_stats_other, f, indent=4)


Run 1/2
Training with SI on Task A
Epoch 1/1, Loss: 1.8949, Penalty: 0.0000
Epoch 1/1, Loss: 0.9789, Penalty: 0.0000
Epoch 1/1, Loss: 0.5907, Penalty: 0.0000
Epoch 1/1, Loss: 0.4593, Penalty: 0.0000
Epoch 1/1, Loss: 0.3959, Penalty: 0.0000
Epoch 1/1, Loss: 0.3561, Penalty: 0.0000
Epoch 1/1, Loss: 0.3298, Penalty: 0.0000
Epoch 1/1, Loss: 0.3096, Penalty: 0.0000
Epoch 1/1, Loss: 0.2946, Penalty: 0.0000
Epoch 1/1, Loss: 0.2801, Penalty: 0.0000
Epoch 1/1, Loss: 0.2674, Penalty: 0.0000
Epoch 1/1, Loss: 0.2557, Penalty: 0.0000
Epoch 1/1, Loss: 0.2463, Penalty: 0.0000
Epoch 1/1, Loss: 0.2372, Penalty: 0.0000
Epoch 1/1, Loss: 0.2278, Penalty: 0.0000
Epoch 1/1, Loss: 0.2200, Penalty: 0.0000
Epoch 1/1, Loss: 0.2107, Penalty: 0.0000
Epoch 1/1, Loss: 0.2041, Penalty: 0.0000
Epoch 1/1, Loss: 0.1970, Penalty: 0.0000
Epoch 1/1, Loss: 0.1897, Penalty: 0.0000
Epoch 1/1, Loss: 0.1820, Penalty: 0.0000
Epoch 1/1, Loss: 0.1751, Penalty: 0.0000
Epoch 1/1, Loss: 0.1690, Penalty: 0.0000
Epoch 1/1, Loss: 0.162

In [None]:
shared_dim = 128
output_dim_A = 10
output_dim_B = 2
epochs = 100
num_runs = 10
for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Rehearsal
    model_rehearsal = ConvNet(shared_dim=shared_dim, output_dim_A=output_dim_A, output_dim_B=output_dim_B)
    optimizer_rehearsal = optim.Adam(model_rehearsal.parameters(), lr=lr)
    model_rehearsal = RehearsalBuffer(buffer_size=2000)

    #model_si.load_state_dict(torch.load('C:/Users/gslax/OneDrive/Desktop/Wits/MastersResearchReport/TIL/Data/Bench/taskA_model.pth'))
    
    # Train and evaluate with Rehearsal
    print("Training with Rehearsal on Task A")
    #train(model_rehearsal, train_A_loader, optimizer_rehearsal, criterion, epochs=epochs)
    print("Evaluating on Task A with Rehearsal")
    accuracy_taskA_rehearsal, precision_taskA_rehearsal, recall_taskA_rehearsal, kappa_taskA_rehearsal = evaluate(model_rehearsal, test_A_loader, criterion)
    
    print("Training with Rehearsal on Task B")
    task_A_during_B_reh_run = []
    task_B_reh_run = []
    val_A_during_B_reh_run = []
    val_B_reh_run = []
    
    print("Training with Rehearsal on Task B")
    for epoch in range(epochs):
        train_with_rehearsal(model_rehearsal, train_B_loader, buffer, optimizer_rehearsal, criterion, epochs=1)
        
        # Evaluate on test set for Domain B
        accuracy_taskB_during_reh, _, _, _ = evaluate(model_rehearsal, test_B_loader, criterion)
        task_B_reh_run.append(accuracy_taskB_during_reh)

        # Evaluate on validation set for Domain B
        accuracy_taskB_during_reh_val, _, _, _ = evaluate(model_rehearsal, val_B_loader, criterion)
        val_B_reh_run.append(accuracy_taskB_during_reh_val)

        # Evaluate performance on Domain A during training on Domain B (test and validation)
        accuracy_A_during_taskB_during_reh, _, _, _ = evaluate(model_rehearsal, test_A_loader, criterion)
        task_A_during_B_reh_run.append(accuracy_A_during_taskB_during_reh)

        accuracy_A_during_taskB_during_reh_val, _, _, _ = evaluate(model_rehearsal, val_A_loader, criterion)
        val_A_during_B_reh_run.append(accuracy_A_during_taskB_during_reh_val)

    epoch_stats_other["reh"]["task_A_during_B"].append(task_A_during_B_reh_run)
    epoch_stats_other["reh"]["acc_B"].append(task_B_reh_run)

    # Store validation metrics for each run
    epoch_stats_other["reh"]["val_task_A_during_B"].append(val_A_during_B_reh_run)
    epoch_stats_other["reh"]["val_acc_B"].append(val_B_reh_run)
    
    
    print("Evaluating on Task B with Rehearsal")
    accuracy_taskB_rehearsal, precision_taskB_rehearsal, recall_taskB_rehearsal, kappa_taskB_rehearsal = evaluate(model_rehearsal, test_B_loader, criterion)
    accuracy_taskB_rehearsal_val, precision_taskB_rehearsal_val, recall_taskB_rehearsal_val, kappa_taskB_rehearsal_val = evaluate(model_rehearsal, val_B_loader, criterion)

    
    print("Evaluating on Task A after Task B with Rehearsal")
    accuracy_taskA_final_rehearsal, precision_taskA_final_rehearsal, recall_taskA_final_rehearsal, kappa_taskA_final_rehearsal = evaluate(model_rehearsal, test_B_loader, criterion)
    accuracy_taskA_final_rehearsal_val, precision_taskA_final_rehearsal_val, recall_taskA_final_rehearsal_val, kappa_taskA_final_rehearsal_val = evaluate(model_rehearsal, val_B_loader, criterion)

    with open('epoch_stats_other_REH_bench.json', 'w') as f:
        json.dump(epoch_stats_other, f, indent=4)



In [None]:
shared_dim = 128
output_dim_A = 10
output_dim_B = 2
criterion = nn.CrossEntropyLoss()
import json
num_runs = 10
epochs = 100
for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")

    # Initialize the PNN model
    lr = 0.0001
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_pnn = PNN(shared_dim=shared_dim, output_dim_A=output_dim_A, output_dim_B=output_dim_B).to(device)
    
    # Optimizers for tasks A and B
    optimizer_pnn_A = optim.Adam(list(model_pnn.fc1_A.parameters()) + list(model_pnn.fc2_A.parameters()), lr=lr)
    optimizer_pnn_B = optim.Adam(list(model_pnn.fc1_B.parameters()) + list(model_pnn.fc2_B.parameters()), lr=lr)


    task_B_during_A_pnn_run = []
    task_A_during_A_pnn_run = []
    
    # Train and evaluate on Task A
    print("Training with PNN on Task A")
    for epoch in range(25):
        train_pnn(model_pnn, train_A_loader, optimizer_pnn_A, criterion, task_num=0, epochs=1)
        # Evaluate on test set for Domain B
        accuracy_taskA_during_A, _, _, _ = evaluate_pnn(model_pnn, test_A_loader, criterion, task_num=0)
        task_A_during_A_pnn_run.append(accuracy_taskA_during_A)

        # Evaluate on validation set for Domain B
        accuracy_taskB_during_A, _, _, _ = evaluate_pnn(model_pnn, test_B_loader, criterion, task_num=1)
        task_B_during_A_pnn_run.append(accuracy_taskB_during_A)

    epoch_stats_other["pnn"]["A_during_A"].append(task_A_during_A_pnn_run)
    epoch_stats_other["pnn"]["B_during_A"].append(task_B_during_A_pnn_run)

    
    print("Evaluating on Task A with PNN")
    accuracy_taskA_pnn, precision_taskA_pnn, recall_taskA_pnn, kappa_taskA_pnn = evaluate_pnn(model_pnn, test_A_loader, criterion, task_num=0)
    print("Evaluating on Task A with PNN - validation Data")
    accuracy_taskA_pnn, precision_taskA_pnn, recall_taskA_pnn, kappa_taskA_pnn = evaluate_pnn(model_pnn, val_A_loader, criterion, task_num=0)

    
    # Train and evaluate on Task B
    print("Training with PNN on Task B")
    task_A_during_B_pnn_run = []
    task_B_pnn_run = []
    val_A_during_B_pnn_run = []
    val_B_pnn_run = []
    
    for epoch in range(epochs):
        train_pnn(model_pnn, train_B_loader, optimizer_pnn_B, criterion, task_num=1, epochs=1)
        
        # Evaluate on test set for Domain B
        accuracy_taskB_during_pnn, _, _, _ = evaluate_pnn(model_pnn, test_B_loader, criterion, task_num=1)
        task_B_pnn_run.append(accuracy_taskB_during_pnn)

        # Evaluate on validation set for Domain B
        accuracy_taskB_during_pnn_val, _, _, _ = evaluate_pnn(model_pnn, val_B_loader, criterion, task_num=1)
        val_B_pnn_run.append(accuracy_taskB_during_pnn_val)

        # Evaluate performance on Domain A during training on Domain B (test and validation)
        accuracy_A_during_taskB_during_pnn, _, _, _ = evaluate_pnn(model_pnn, test_A_loader, criterion, task_num=0)
        task_A_during_B_pnn_run.append(accuracy_A_during_taskB_during_pnn)

        accuracy_A_during_taskB_during_pnn_val, _, _, _ = evaluate_pnn(model_pnn, val_A_loader, criterion, task_num=0)
        val_A_during_B_pnn_run.append(accuracy_A_during_taskB_during_pnn_val)

    epoch_stats_other["pnn"]["task_A_during_B"].append(task_A_during_B_pnn_run)
    epoch_stats_other["pnn"]["acc_B"].append(task_B_pnn_run)

    # Store validation metrics for each run
    epoch_stats_other["pnn"]["val_task_A_during_B"].append(val_A_during_B_pnn_run)
    epoch_stats_other["pnn"]["val_acc_B"].append(val_B_pnn_run)
    
    print("Evaluating on Task B with PNN")
    accuracy_taskB_pnn, precision_taskB_pnn, recall_taskB_pnn, kappa_taskB_pnn = evaluate_pnn(model_pnn, test_B_loader, criterion, task_num=1)
    print("Evaluating on Task B with PNN - Validation")
    accuracy_taskB_pnn, precision_taskB_pnn, recall_taskB_pnn, kappa_taskB_pnn = evaluate_pnn(model_pnn, val_B_loader, criterion, task_num=1)
    
    
    # Evaluate on Task A after training on Task B
    print("Evaluating on Task A after training on Task B with PNN")
    accuracy_taskA_after_B_pnn, precision_taskA_after_B_pnn, recall_taskA_after_B_pnn, kappa_taskA_after_B_pnn = evaluate_pnn(model_pnn, test_A_loader, criterion, task_num=0)
    # Evaluate on Task A after training on Task B
    print("Evaluating on Task A after training on Task B with PNN - Validation")
    accuracy_taskA_after_B_pnn, precision_taskA_after_B_pnn, recall_taskA_after_B_pnn, kappa_taskA_after_B_pnn = evaluate_pnn(model_pnn, val_A_loader, criterion, task_num=0)

    with open('epoch_stats_other_PNN_TIL_bench_final.json', 'w') as f:
        json.dump(epoch_stats_other, f, indent=4)
    


In [267]:
import numpy as np

# Hyperparameters
learning_rate = 0.00001

# Initialize storage for results, including "Untrained_A"
results = {
    "untrained_A": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "initial_A": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "B_ewc": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "A_after_B_ewc": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "B_no_ewc": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "A_after_B_no_ewc": {"accuracy": [], "precision": [], "recall": [], "kappa": []}
}

results_val = {
    "initial_A_val": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "B_ewc_val": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "A_after_B_ewc_val": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "B_no_ewc_val": {"accuracy": [], "precision": [], "recall": [], "kappa": []},
    "A_after_B_no_ewc_val": {"accuracy": [], "precision": [], "recall": [], "kappa": []}
}

# Store per-epoch values for plotting
epoch_stats = {
    "Initial": {"accuracy": [], "loss": []},
    "ewc": {"accuracy": [], "loss": [], "task_A_during_B":[], "val_task_A_during_B": [], "val_acc": [], "val_loss": [],"val_task_B":[],"task_B":[],"B_during_A":[],"B_during_A_val":[],"A_during_A":[],"A_during_A_val":[]},
    "no_ewc": {"accuracy": [], "loss": [], "task_A_during_B": [], "val_task_A_during_B": [], "val_acc": [], "val_loss": [],"task_B":[],"val_task_B":[]}
}

epochs = 50
num_runs = 10

for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")

    # Initialize the model for Task A
    modelA = ConvNet(shared_dim=128, output_dim_A=10, output_dim_B=2)
    optimizerA = optim.Adam(modelA.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # Evaluate "Untrained_A" - performance on Task A before any training
    accuracy_untrained_A, precision_untrained_A, recall_untrained_A, kappa_untrained_A = evaluate(modelA, test_A_loader, criterion)
    print(f"Task A - Untrained: Accuracy: {accuracy_untrained_A}, Precision: {precision_untrained_A}, Recall: {recall_untrained_A}, Kappa: {kappa_untrained_A}")
    results["untrained_A"]["accuracy"].append(accuracy_untrained_A)
    results["untrained_A"]["precision"].append(precision_untrained_A)
    results["untrained_A"]["recall"].append(recall_untrained_A)
    results["untrained_A"]["kappa"].append(kappa_untrained_A)

    task_A_ewc_run = []
    val_task_A_ewc_run = []

    task_B_during_A_ewc_run = []
    val_task_B_during_A_ewc_run = []

    
    
    # # Train model on Task A (digit classification)
    for epoch in range(25):
        acc_A, loss_A = train_with_patience(modelA, train_A_loader, optimizerA, criterion, epochs = 1, patience=100)
        
        # Track performance on Task B
        accuracy_taskA, _, _, _ = evaluate(modelA, test_A_loader, criterion)
        print(f"Epoch {epoch + 1} - Task A performance during Task A (with EWC): {accuracy_taskA:.4f}")
        task_A_ewc_run.append(accuracy_taskA)
        
        # Track performance on Task B
        val_accuracy_taskA, _, _, _ = evaluate(modelA, val_A_loader, criterion)
        print(f"Epoch {epoch + 1} - Task A performance during Task A - validation set (with EWC): {val_accuracy_taskA:.4f}")
        val_task_A_ewc_run.append(val_accuracy_taskA)
        
        # Track performance on Task A while training on Task B
        accuracy_taskB_during_A_ewc, _, _, _ = evaluate(modelA, test_B_loader, criterion)
        print(f"Epoch {epoch + 1} - Task B performance during Task A (with EWC): {accuracy_taskB_during_A_ewc:.4f}")
        task_B_during_A_ewc_run.append(accuracy_taskB_during_A_ewc)
        
        # Track performance on Task A while training on Task B
        val_accuracy_taskB_during_A_ewc, _, _, _ = evaluate(modelA, val_B_loader, criterion)
        print(f"Epoch {epoch + 1} - Task B performance during Task A - validation set (with EWC): {val_accuracy_taskB_during_A_ewc:.4f}")
        val_task_B_during_A_ewc_run.append(val_accuracy_taskB_during_A_ewc)

    epoch_stats["ewc"]["B_during_A"].append(task_B_during_A_ewc_run)
    epoch_stats["ewc"]["B_during_A_val"].append(val_task_B_during_A_ewc_run)
    epoch_stats["ewc"]["A_during_A"].append(task_A_ewc_run)
    epoch_stats["ewc"]["A_during_A_val"].append(val_task_A_ewc_run)

     # Save model weights after Task A training
    task_a_weights.append(modelA.state_dict())
    #modelA.load_state_dict(weights)  # Directly load the state dict

    # Store epoch-wise results for plotting
    #epoch_stats["Initial"]["accuracy"].append(acc_A)
    #epoch_stats["Initial"]["loss"].append(loss_A)
    
    # Evaluate initial performance on Task A
    accuracy_taskA_initial, precision_taskA_initial, recall_taskA_initial, kappa_taskA_initial = evaluate(modelA, test_A_loader, criterion)
    print(f"Task A - Initial: Accuracy: {accuracy_taskA_initial}, Precision: {precision_taskA_initial}, Recall: {recall_taskA_initial}, Cohen's Kappa: {kappa_taskA_initial}")
    results["initial_A"]["accuracy"].append(accuracy_taskA_initial)
    results["initial_A"]["precision"].append(precision_taskA_initial)
    results["initial_A"]["recall"].append(recall_taskA_initial)
    results["initial_A"]["kappa"].append(kappa_taskA_initial)

    # Evaluate initial performance on Task A - validation set
    accuracy_taskA_initial_val, precision_taskA_initial_val, recall_taskA_initial_val, kappa_taskA_initial_val = evaluate(modelA, val_A_loader, criterion)
    print(f"Task A - Initial: Accuracy: {accuracy_taskA_initial_val:.4f}, Precision: {precision_taskA_initial_val:.4f}, Recall: {recall_taskA_initial_val:.4f}, Cohen's Kappa: {kappa_taskA_initial_val:.4f}")
    results_val["initial_A_val"]["accuracy"].append(accuracy_taskA_initial_val)
    results_val["initial_A_val"]["precision"].append(precision_taskA_initial_val)
    results_val["initial_A_val"]["recall"].append(recall_taskA_initial_val)
    results_val["initial_A_val"]["kappa"].append(kappa_taskA_initial_val)

    # Initialize EWC
    ewc = EWC(modelA, train_A_loader, importance=9999999999999999)

    # Store task_A_during_B as a list for this run
    task_A_during_B_ewc_run = []
    task_A_during_B_no_ewc_run = []

    task_B_ewc_run = []
    task_B_no_ewc_run = []

    val_task_B_ewc_run = []
    val_task_B_no_ewc_run = []

    val_task_A_during_B_ewc_run = []
    val_task_A_during_B_no_ewc_run = []

    
    for epoch in range(epochs):
        # Train model on Task B with EWC
        acc_B_ewc, loss_B_ewc = train_with_ewc_with_patience(modelA, train_B_loader, ewc, optimizerA, criterion, epochs=1, patience=20)
    
        # Store epoch-wise results for Task B
        #epoch_stats["ewc"]["accuracy"].append(acc_B_ewc)
        #epoch_stats["ewc"]["loss"].append(loss_B_ewc)

        # Track performance on Task B
        accuracy_taskB, _, _, _ = evaluate(modelA, test_B_loader, criterion)
        print(f"Epoch {epoch + 1} - Task B performance during Task B (with EWC): {accuracy_taskB:.4f}")
        task_B_ewc_run.append(accuracy_taskB)

        # Track performance on Task B
        val_accuracy_taskB, _, _, _ = evaluate(modelA, val_B_loader, criterion)
        print(f"Epoch {epoch + 1} - Task B performance during Task B - validation set (with EWC): {val_accuracy_taskB:.4f}")
        val_task_B_ewc_run.append(val_accuracy_taskB)
    
        # Track performance on Task A while training on Task B
        accuracy_taskA_during_B_ewc, _, _, _ = evaluate(modelA, test_A_loader, criterion)
        print(f"Epoch {epoch + 1} - Task A performance during Task B (with EWC): {accuracy_taskA_during_B_ewc:.4f}")
        task_A_during_B_ewc_run.append(accuracy_taskA_during_B_ewc)

        # Track performance on Task A while training on Task B
        val_accuracy_taskA_during_B_ewc, _, _, _ = evaluate(modelA, val_A_loader, criterion)
        print(f"Epoch {epoch + 1} - Task A performance during Task B - validation set (with EWC): {val_accuracy_taskA_during_B_ewc:.4f}")
        val_task_A_during_B_ewc_run.append(val_accuracy_taskA_during_B_ewc)
        
    epoch_stats["ewc"]["task_A_during_B"].append(task_A_during_B_ewc_run)
    epoch_stats["ewc"]["val_task_A_during_B"].append(val_task_A_during_B_ewc_run)
    epoch_stats["ewc"]["task_B"].append(task_B_ewc_run)
    epoch_stats["ewc"]["val_task_B"].append(val_task_B_ewc_run)

    # Evaluate performance on Task B with EWC
    accuracy_taskB_ewc, precision_taskB_ewc, recall_taskB_ewc, kappa_taskB_ewc = evaluate(modelA, test_B_loader, criterion)
    print(f"Task B with EWC: Accuracy: {accuracy_taskB_ewc}, Precision: {precision_taskB_ewc}, Recall: {recall_taskB_ewc}, Cohen's Kappa: {kappa_taskB_ewc}")
    results["B_ewc"]["accuracy"].append(accuracy_taskB_ewc)
    results["B_ewc"]["precision"].append(precision_taskB_ewc)
    results["B_ewc"]["recall"].append(recall_taskB_ewc)
    results["B_ewc"]["kappa"].append(kappa_taskB_ewc)

    # Evaluate on Task B - Validation
    accuracy_taskB_ewc_val, precision_taskB_ewc_val, recall_taskB_ewc_val, kappa_taskB_ewc_val = evaluate(modelA, val_B_loader, criterion)
    print(f"Task A after Task B with EWC: Accuracy: {accuracy_taskB_ewc_val:.4f}, Precision: {precision_taskB_ewc_val:.4f}, Recall: {recall_taskB_ewc_val:.4f}, Kappa: {kappa_taskB_ewc_val:.4f}")
    results_val["B_ewc_val"]["accuracy"].append(accuracy_taskB_ewc_val)
    results_val["B_ewc_val"]["precision"].append(precision_taskB_ewc_val)
    results_val["B_ewc_val"]["recall"].append(recall_taskB_ewc_val)
    results_val["B_ewc_val"]["kappa"].append(kappa_taskB_ewc_val)

    # Evaluate performance on Task A after training on Task B with EWC
    accuracy_taskA_final_ewc, precision_taskA_final_ewc, recall_taskA_final_ewc, kappa_taskA_final_ewc = evaluate(modelA, test_A_loader, criterion)
    print(f"Task A - Final with EWC: Accuracy: {accuracy_taskA_final_ewc}, Precision: {precision_taskA_final_ewc}, Recall: {recall_taskA_final_ewc}, Cohen's Kappa: {kappa_taskA_final_ewc}")
    results["A_after_B_ewc"]["accuracy"].append(accuracy_taskA_final_ewc)
    results["A_after_B_ewc"]["precision"].append(precision_taskA_final_ewc)
    results["A_after_B_ewc"]["recall"].append(recall_taskA_final_ewc)
    results["A_after_B_ewc"]["kappa"].append(kappa_taskA_final_ewc)

    # Evaluate performance on Task A after training on Task B with EWC - Validation
    accuracy_taskA_after_B_ewc_val, precision_taskA_after_B_ewc_val, recall_taskA_after_B_ewc_val, kappa_taskA_after_B_ewc_val = evaluate(modelA, val_A_loader, criterion)
    print(f"Task A after Task B with EWC: Accuracy: {accuracy_taskA_after_B_ewc_val:.4f}, Precision: {precision_taskA_after_B_ewc_val:.4f}, Recall: {recall_taskA_after_B_ewc_val:.4f}, Cohen's Kappa: {kappa_taskA_after_B_ewc_val:.4f}")
    results_val["A_after_B_ewc_val"]["accuracy"].append(accuracy_taskA_after_B_ewc_val)
    results_val["A_after_B_ewc_val"]["precision"].append(precision_taskA_after_B_ewc_val)
    results_val["A_after_B_ewc_val"]["recall"].append(recall_taskA_after_B_ewc_val)
    results_val["A_after_B_ewc_val"]["kappa"].append(kappa_taskA_after_B_ewc_val)

    with open('epoch_stats_TIL_Bench_smaller_sample_final_test.json', 'w') as f:
        json.dump(epoch_stats, f, indent=4)
    
        #torch.save(task_a_weights, 'task_a_weights_til_bench.pth')


Run 1/10
Task A - Untrained: Accuracy: 0.10333333333333333, Precision: 0.02144229650824668, Recall: 0.10333333333333333, Kappa: 0.0037037037037036535
Epoch 1/1, Loss: 1.9479
Epoch 1 - Task A performance during Task A (with EWC): 0.7593
Epoch 1 - Task A performance during Task A - validation set (with EWC): 0.7263
Epoch 1 - Task B performance during Task A (with EWC): 0.3460
Epoch 1 - Task B performance during Task A - validation set (with EWC): 0.3353
Epoch 1/1, Loss: 1.0519
Epoch 2 - Task A performance during Task A (with EWC): 0.8593
Epoch 2 - Task A performance during Task A - validation set (with EWC): 0.8427
Epoch 2 - Task B performance during Task A (with EWC): 0.4073
Epoch 2 - Task B performance during Task A - validation set (with EWC): 0.3977
Epoch 1/1, Loss: 0.6222
Epoch 3 - Task A performance during Task A (with EWC): 0.8807
Epoch 3 - Task A performance during Task A - validation set (with EWC): 0.8660
Epoch 3 - Task B performance during Task A (with EWC): 0.4060
Epoch 3 - T

In [335]:
# Reuse Task A weights for initializing Task B models
for i, weights in enumerate(task_a_weights):

    epochs = 50
    num_runs = 1
    for run in range(num_runs):
        print(f"Run {run + 1}/{num_runs}")
        # Initialize the model for Task B without EWC
        modelB = ConvNet(shared_dim=128, output_dim_A=10, output_dim_B=2)
        modelB.load_state_dict(weights)  # Directly load the state dict
    
    
        optimizerB = optim.Adam(modelB.parameters(), lr=learning_rate)
        criterionB = nn.CrossEntropyLoss()

        # Track performance on Task A while training on Task B
        accuracy_taskA_during_B_no_ewc, _, _, _ = evaluate(modelB, test_A_loader, criterion)
        print(f"Epoch {epoch + 1} - Task A performance during Task B (without EWC): {accuracy_taskA_during_B_no_ewc:.4f}")
    
        #acc_A, loss_A = train_with_patience(modelB, train_A_loader, optimizerB, criterionB, epochs=1, patience=20)
    
        for epoch in range(epochs):
            # Train model on Task B without EWC
            acc_B_no_ewc, loss_B_no_ewc = train_with_patience(modelB, train_B_loader, optimizerB, criterion, epochs=1, patience=100)
        
            # Store epoch-wise results for Task B
            #epoch_stats["no_ewc"]["accuracy"].append(acc_B_no_ewc)
            #epoch_stats["no_ewc"]["loss"].append(loss_B_no_ewc)
    
            # Track performance on Task B
            accuracy_taskB_no_ewc, _, _, _ = evaluate(modelB, test_B_loader, criterion)
            print(f"Epoch {epoch + 1} - Task B performance during Task B (without EWC): {accuracy_taskB_no_ewc:.4f}")
            task_B_no_ewc_run.append(accuracy_taskB_no_ewc)
    
            # Track performance on Task B
            val_accuracy_taskB_no_ewc, _, _, _ = evaluate(modelB, val_B_loader, criterion)
            print(f"Epoch {epoch + 1} - Task B performance during Task B - validation set (without EWC): {val_accuracy_taskB_no_ewc:.4f}")
            val_task_B_no_ewc_run.append(val_accuracy_taskB_no_ewc)
        
            # Track performance on Task A while training on Task B
            accuracy_taskA_during_B_no_ewc, _, _, _ = evaluate(modelB, test_A_loader, criterion)
            print(f"Epoch {epoch + 1} - Task A performance during Task B (without EWC): {accuracy_taskA_during_B_no_ewc:.4f}")
            task_A_during_B_no_ewc_run.append(accuracy_taskA_during_B_no_ewc)
    
            # Track performance on Task A while training on Task B
            val_accuracy_taskA_during_B_no_ewc, _, _, _ = evaluate(modelB, val_A_loader, criterion)
            print(f"Epoch {epoch + 1} - Task A performance during Task B - validation set (without EWC): {val_accuracy_taskA_during_B_no_ewc:.4f}")
            val_task_A_during_B_no_ewc_run.append(val_accuracy_taskA_during_B_no_ewc)
            
        epoch_stats["no_ewc"]["task_A_during_B"].append(task_A_during_B_no_ewc_run)
        epoch_stats["no_ewc"]["val_task_A_during_B"].append(val_task_A_during_B_no_ewc_run)
        epoch_stats["no_ewc"]["task_B"].append(task_B_no_ewc_run)
        epoch_stats["no_ewc"]["val_task_B"].append(val_task_B_no_ewc_run)
    
        # Evaluate performance on Task B without EWC
        accuracy_taskB_no_ewc, precision_taskB_no_ewc, recall_taskB_no_ewc, kappa_taskB_no_ewc = evaluate(modelB, test_B_loader, criterion)
        print(f"Task B without EWC: Accuracy: {accuracy_taskB_no_ewc}, Precision: {precision_taskB_no_ewc}, Recall: {recall_taskB_no_ewc}, Cohen's Kappa: {kappa_taskB_no_ewc}")
        results["B_no_ewc"]["accuracy"].append(accuracy_taskB_no_ewc)
        results["B_no_ewc"]["precision"].append(precision_taskB_no_ewc)
        results["B_no_ewc"]["recall"].append(recall_taskB_no_ewc)
        results["B_no_ewc"]["kappa"].append(kappa_taskB_no_ewc)
    
        # Evaluate on Task B - validation
        accuracy_taskB_no_ewc_val, precision_taskB_no_ewc_val, recall_taskB_no_ewc_val, kappa_taskB_no_ewc_val = evaluate(modelB, val_B_loader, criterion)
        print(f"Task A after Task B without EWC: Accuracy: {accuracy_taskB_no_ewc_val:.4f}, Precision: {precision_taskB_no_ewc_val:.4f}, Recall: {recall_taskB_no_ewc_val:.4f}, Kappa: {kappa_taskB_no_ewc_val:.4f}")
        results_val["B_no_ewc_val"]["accuracy"].append(accuracy_taskB_no_ewc_val)
        results_val["B_no_ewc_val"]["precision"].append(precision_taskB_no_ewc_val)
        results_val["B_no_ewc_val"]["recall"].append(recall_taskB_no_ewc_val)
        results_val["B_no_ewc_val"]["kappa"].append(kappa_taskB_no_ewc_val)
    
        # Evaluate performance on Task A after training on Task B without EWC
        accuracy_taskA_final_no_ewc, precision_taskA_final_no_ewc, recall_taskA_final_no_ewc, kappa_taskA_final_no_ewc = evaluate(modelB, test_A_loader, criterion)
        print(f"Task A - Final without EWC: Accuracy: {accuracy_taskA_final_no_ewc}, Precision: {precision_taskA_final_no_ewc}, Recall: {recall_taskA_final_no_ewc}, Cohen's Kappa: {kappa_taskA_final_no_ewc}")
        results["A_after_B_no_ewc"]["accuracy"].append(accuracy_taskA_final_no_ewc)
        results["A_after_B_no_ewc"]["precision"].append(precision_taskA_final_no_ewc)
        results["A_after_B_no_ewc"]["recall"].append(recall_taskA_final_no_ewc)
        results["A_after_B_no_ewc"]["kappa"].append(kappa_taskA_final_no_ewc)
    
        # Evaluate performance on Task A after training on Task B without EWC - validation
        accuracy_taskA_after_B_no_ewc_val, precision_taskA_after_B_no_ewc_val, recall_taskA_after_B_no_ewc_val, kappa_taskA_after_B_no_ewc_val = evaluate(modelB, val_A_loader, criterion)
        print(f"Task A after Task B without EWC: Accuracy: {accuracy_taskA_after_B_no_ewc_val:.4f}, Precision: {precision_taskA_after_B_no_ewc_val:.4f}, Recall: {recall_taskA_after_B_no_ewc_val:.4f}, Cohen's Kappa: {kappa_taskA_after_B_no_ewc_val:.4f}")
        results_val["A_after_B_no_ewc_val"]["accuracy"].append(accuracy_taskA_after_B_no_ewc_val)
        results_val["A_after_B_no_ewc_val"]["precision"].append(precision_taskA_after_B_no_ewc_val)
        results_val["A_after_B_no_ewc_val"]["recall"].append(recall_taskA_after_B_no_ewc_val)
        results_val["A_after_B_no_ewc_val"]["kappa"].append(kappa_taskA_after_B_no_ewc_val)
    
        with open('epoch_stats__no_ewc_TIL_Bench_smaller_sample_2.json', 'w') as f:
            json.dump(epoch_stats, f, indent=4)
        
# # Calculate statistics: mean, variance, min, and max
# statistics_results = {
#     key: {
#         metric: {
#             "mean": np.mean(values),
#             "variance": np.var(values),
#             "min": np.min(values),
#             "max": np.max(values)
#         }
#         for metric, values in metrics.items()
#     }
#     for key, metrics in results.items()
# }

# # Print results with statistics
# print("\nResults with Statistics:")
# for key, metrics in statistics_results.items():
#     print(f"{key}:")
#     for metric, stats in metrics.items():
#         print(f"  {metric} - Mean: {stats['mean']:.4f}, Variance: {stats['variance']:.4f}, Min: {stats['min']:.4f}, Max: {stats['max']:.4f}")


Run 1/1
Epoch 50 - Task A performance during Task B (without EWC): 0.9233
Epoch 1/1, Loss: 0.2356
Epoch 1 - Task B performance during Task B (without EWC): 0.9280
Epoch 1 - Task B performance during Task B - validation set (without EWC): 0.9433
Epoch 1 - Task A performance during Task B (without EWC): 0.8673
Epoch 1 - Task A performance during Task B - validation set (without EWC): 0.8520
Epoch 1/1, Loss: 0.1826
Epoch 2 - Task B performance during Task B (without EWC): 0.9460
Epoch 2 - Task B performance during Task B - validation set (without EWC): 0.9580
Epoch 2 - Task A performance during Task B (without EWC): 0.8160
Epoch 2 - Task A performance during Task B - validation set (without EWC): 0.8080
Epoch 1/1, Loss: 0.1553
Epoch 3 - Task B performance during Task B (without EWC): 0.9533
Epoch 3 - Task B performance during Task B - validation set (without EWC): 0.9600
Epoch 3 - Task A performance during Task B (without EWC): 0.7973
Epoch 3 - Task A performance during Task B - validatio

In [None]:
import json
# Initialize storage for results, including "Untrained_A"


# Store per-epoch values for plotting
epoch_stats_A = {
    "Initial": {"accuracy": [], "loss": []}
}

epoch_stats_B = {
    "Initial": {"accuracy": [], "loss": []}
}


for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    model1_A = ConvNet(shared_dim=128, output_dim_A=10, output_dim_B=2)
    optimizer = optim.Adam(model1_A.parameters(), lr=learning_rate))
    criterion = nn.CrossEntropyLoss()


    # Store task_A_during_B as a list for this run
    task_A_run = []
    task_B_run = []


    for epoch in range(epochs):

        # Train and evaluate on Blue domain with EWC
        print("Training on Blue domain with EWC")
        ewc = EWC(model1_A, train_A_loader, importance=900000000)
        acc_A_ewc, loss_a_ewc = train_with_patience(model1_A, train_A_loader, optimizer, criterion, epochs=1,patience=50)

        # Track performance on Task B
        accuracy_taskA, _, _, _ = evaluate(model1_A, val_A_loader, criterion)
        print(f"Epoch {epoch + 1} - Task A (with EWC): {accuracy_taskA:.4f}")
        task_A_run.append(accuracy_taskA)

        # Track performance on Task B
        accuracy_taskB, _, _, _ = evaluate(model1_A, val_loader_B, criterion)
        print(f"Epoch {epoch + 1} - Task B (with EWC): {accuracy_taskB:.4f}")
        task_B_run.append(accuracy_taskB)
       
    epoch_stats_A["Initial"]["accuracy"].append(task_A_run)
    epoch_stats_B["Initial"]["accuracy"].append(task_B_run)
    
    with open('epoch_stats_B_til_bench_val.json', 'w') as f:
        json.dump(epoch_stats_B, f, indent=4)
    

In [84]:
accuracy_taskA_final_rehearsal

0.9923