# Experiment 3

## Overview

This notebook implements Experiment 3 to further evaluate our classic and novel continual learning approaches on variants of the MNIST dataset.

The key components are:

* **Classic Approach**:
    * Implements classic approach training by looping through tasks, training the model and evaluating performance.
    * Saves the trained models.

* **Novel Approach**:
    * Implements novel training using our proposed method.
    * Loads pretrained classic models to initialise batch normalisation means.
    * Calculates task similarities and soft parameter sharing alphas.
    * Trains using modified parameter sharing.

* **Post-Training Analysis**:
    * Generates confusion matrices and calculates performance metrics for each task.
    * Visualises average confusion matrix and metrics to compare classic and novel approaches.

The notebook demonstrates our comprehensive experiment workflow - implementing baselines, proposing a novel method, training models, evaluating performance and comparing results.

##  Importing Required Libraries

In [None]:
import os
import torch
import pandas as pd
import seaborn as sns
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

from utilities.eval_pred_funcs import predict
from utilities.train_funcs import train, trainV2
from utilities.models import MultitaskFC, MultitaskFCV2
from utilities.data import MNISTPerm, PartitionMNIST, RotatingMNIST
from utilities.utils import cache_masks, set_model_task, set_num_tasks_learned
from utilities.similarity_funcs import calculate_task_similarityE2, determine_alphas

## Classic Approach

In [None]:
# Define the function for classic training over specified epochs and tasks, 
# with the hidden_size set as 300
def training_classic_pd(epochs, num_tasks, hidden_size=300):
    # Load the permuted MNIST dataset
    mnist = MNISTPerm()

    # Initialise the MultitaskFC model for the tasks
    model = MultitaskFC(hidden_size=hidden_size, num_tasks=num_tasks)
    

    # Initialise a results dictionary for the permuted MNIST dataset
    perm_results = {}
    
    # Dictionary to store the btach normalisation means for each task
    bn_means_dict = {}

    # Loop over each task for training and prediction
    for task_id in range(num_tasks):
        # Log the training task number
        print(f"Training for task {task_id}")

        # Create a dictionary for each task's results
        perm_results[task_id] = {}

        # Set the current task in the model
        set_model_task(model, task_id)

        # Update the task in the dataset
        mnist.update_task(task_id)

        # Initialise the optimiser (RMSprop) for model parameters that require gradient computation
        optimizer = optim.RMSprop(
            [p for p in model.parameters() if p.requires_grad], lr=1e-4
        )

        # Loop over each epoch to train the model
        for e in range(epochs):
            # Use the training function for this model
            bn_means = train(model, mnist.train_loader, optimizer, e, task_id)
            
            # Store the batch normalisation means in the dictionary for the task
            bn_means_dict[task_id] = bn_means

            # Display validation information
            print("Validation")
            print("============")

            # At the end of the last epoch, predict and save the predictions and labels for the task
            if e == epochs - 1:
                perm_predictions, perm_labels = predict(model, mnist.val_loader, e)

                perm_results[task_id]["perm_predictions"] = [
                    int(x) for x in perm_predictions
                ]

                perm_results[task_id]["perm_labels"] = [int(x) for x in perm_labels]

        # Cache the current state of the masks in the model
        cache_masks(model)
        print()

        # Update the number of learned tasks in the model
        set_num_tasks_learned(model, task_id + 1)
        print()
        
        # Save the model for the current task
        current_directory = os.getcwd()
        models_directory = os.path.join(current_directory, 'models')
        prediction_directory = os.path.join(models_directory, 'prediction')
        file_path = os.path.join(prediction_directory, f'pd_permuted_model_task_{task_id}.pth')
        torch.save(model.state_dict(), file_path)


    # Save the results of each task to a CSV file
    for key, value in perm_results.items():
        df = pd.DataFrame(value)
        df.to_csv(f"outputs/prediction/perm_task_{key}_classic.csv")

        
        
        
        
    # Load the rotated MNIST dataset
    mnist = RotatingMNIST()

    # Initialise the MultitaskFC model for the tasks
    model = MultitaskFC(hidden_size=hidden_size, num_tasks=num_tasks)
    

    # Initialise a results dictionary for the rotated MNIST dataset
    rotate_results = {}
    
    # Dictionary to store the bacth normalisation means for each task
    bn_means_dict = {}

    # Loop over each task for training and prediction
    for task_id in range(num_tasks):
        # Log the training task number
        print(f"Training for task {task_id}")

        # Create a dictionary for each task's results
        rotate_results[task_id] = {}

        # Set the current task in the model
        set_model_task(model, task_id)

        # Update the task in the dataset
        mnist.update_task(task_id)

        # Initialise the optimiser (RMSprop) for model parameters that require gradient computation
        optimizer = optim.RMSprop(
            [p for p in model.parameters() if p.requires_grad], lr=1e-4
        )

        # Loop over each epoch to train the model
        for e in range(epochs):
            # Use the training function for this model
            bn_means = train(model, mnist.train_loader, optimizer, e, task_id)
            
            # Store the batch normalisation means in the dictionary for the task
            bn_means_dict[task_id] = bn_means

            # Display validation information
            print("Validation")
            print("============")

            # At the end of the last epoch, predict and save the predictions and labels for the task
            if e == epochs - 1:
                rotate_predictions, rotate_labels = predict(model, mnist.val_loader, e)

                rotate_results[task_id]["rotate_predictions"] = [
                    int(x) for x in rotate_predictions
                ]

                rotate_results[task_id]["rotate_labels"] = [int(x) for x in rotate_labels]

        # Cache the current state of the masks in the model
        cache_masks(model)
        print()

        # Update the number of learned tasks in the model
        set_num_tasks_learned(model, task_id + 1)
        print()
        
        # Save the model for the current task
        current_directory = os.getcwd()
        models_directory = os.path.join(current_directory, 'models')
        prediction_directory = os.path.join(models_directory, 'prediction')
        file_path = os.path.join(prediction_directory, f'pd_rotated_model_task_{task_id}.pth')
        torch.save(model.state_dict(), file_path)


    # Save the results of each task to a CSV file
    for key, value in rotate_results.items():
        df = pd.DataFrame(value)
        df.to_csv(f"outputs/prediction/rotate_task_{key}_classic.csv")

        
        
        
        
    # Load the partitioned MNIST dataset V2
    mnist = PartitionMNIST()

    # Initialise the MultitaskFC model for the tasks
    model = MultitaskFC(hidden_size=hidden_size, num_tasks=num_tasks)
    

    # Initialise a results dictionary for the partitioned MNIST dataset
    part_results = {}
    
    # Dictionary to store the bacth normalisation means for each task
    bn_means_dict = {}

    # Loop over each task for training and prediction
    for task_id in range(num_tasks):
        # Log the training task number
        print(f"Training for task {task_id}")

        # Create a dictionary for each task's results
        part_results[task_id] = {}

        # Set the current task in the model
        set_model_task(model, task_id)

        # Update the task in the dataset
        mnist.update_task(task_id)

        # Initialise the optimiser (RMSprop) for model parameters that require gradient computation
        optimizer = optim.RMSprop(
            [p for p in model.parameters() if p.requires_grad], lr=1e-4
        )

        # Loop over each epoch to train the model
        for e in range(epochs):
            # Use the training function for this model
            bn_means = train(model, mnist.train_loader, optimizer, e, task_id)
            
            # Store the batch normalisation means in the dictionary for the task
            bn_means_dict[task_id] = bn_means

            # Display validation information
            print("Validation")
            print("============")

            # At the end of the last epoch, predict and save the predictions and labels for the task
            if e == epochs - 1:
                part_predictions, part_labels = predict(model, mnist.val_loader, e)

                part_results[task_id]["part_predictions"] = [
                    int(x) for x in part_predictions
                ]

                part_results[task_id]["part_labels"] = [int(x) for x in part_labels]

        # Cache the current state of the masks in the model
        cache_masks(model)
        print()

        # Update the number of learned tasks in the model
        set_num_tasks_learned(model, task_id + 1)
        print()
        
        # Save the model for the current task
        current_directory = os.getcwd()
        models_directory = os.path.join(current_directory, 'models')
        prediction_directory = os.path.join(models_directory, 'prediction')
        file_path = os.path.join(prediction_directory, f'pd_partitioned_model_task_{task_id}.pth')
        torch.save(model.state_dict(), file_path)


    # Save the results of each task to a CSV file
    for key, value in part_results.items():
        df = pd.DataFrame(value)
        df.to_csv(f"outputs/prediction/part_task_{key}_classic.csv")

In [None]:
# Check if the script is run directly
if __name__ == "__main__":
    # Call the function training_classic_pd
    training_classic_pd(2, 10)

## Novel Approach

In [None]:
# Define the function for novel training over specified epochs and tasks, 
# with the hidden_size set as 300
def training_novel_pd(epochs, num_tasks, hidden_size=300):
    
    # Load the permuted MNIST dataset
    mnist = MNISTPerm()

    # Initialise the MultitaskFCV2 model for the tasks
    model = MultitaskFCV2(hidden_size=hidden_size, num_tasks=num_tasks)

    # Initialise a results dictionary for the permuted MNIST dataset
    perm_results = {}
        
        
    # Loop through all tasks to load the saved models and extract batch normalisation means values
    for task_id in range(num_tasks):
        current_directory = os.getcwd()
        models_directory = os.path.join(current_directory, 'models')
        prediction_directory = os.path.join(models_directory, 'prediction')
        file_path = os.path.join(prediction_directory, f'pd_permuted_model_task_{task_id}.pth')
        classic_model = MultitaskFC(hidden_size=hidden_size, num_tasks=num_tasks)
        classic_model.load_state_dict(torch.load(file_path), strict=False)
        bn_mean = classic_model.get_bn_means(task_id)
        model.bn_means[task_id] = bn_mean
        print(f"Task {task_id}: bn_mean =", bn_mean)
        

    # Loop over each task for training and prediction
    for task in range(num_tasks):
        
        print(f"Current task: {task}")
        print(f"Updated bn_means (length: {len(model.bn_means)}): {model.bn_means}")
        
        # Calculate the similarities matrix based on the current state of batch normalisation means
        similarities_matrix = calculate_task_similarityE2(model.bn_means, num_tasks)

        # Calculate the alphas based on the task similarities
        alphas_per_layer = determine_alphas(similarities_matrix, task)

        # Set the alphas for the multitask masked linear layers
        model.set_alphas(alphas_per_layer)
        
        # Log the training task number
        print(f"Training for task {task}")
        
        # Create a dictionary for each task's results
        perm_results[task] = {}
        
        # Set the current task in the model
        set_model_task(model, task)

        # Update the task in the dataset
        mnist.update_task(task)

        # Initialise the optimiser (RMSprop) for model parameters that require gradient computation
        optimizer = optim.RMSprop(
            [p for p in model.parameters() if p.requires_grad], lr=1e-4
        )
        
        # Loop over each epoch to train the model
        for e in range(epochs):
            # Use the training function for this model
            trainV2(model, mnist.train_loader, optimizer, e, bn_means=model.bn_means)

            # Display validation information
            print("Validation")
            print("============")

            # At the end of the last epoch, predict and save the predictions and labels for the task
            if e == epochs - 1:
                perm_predictions, perm_labels = predict(model, mnist.val_loader, e)

                perm_results[task]["perm_predictions"] = [
                    int(x) for x in perm_predictions
                ]

                perm_results[task]["perm_labels"] = [int(x) for x in perm_labels]

        # Cache the current state of the masks in the model
        cache_masks(model)
        print()

        # Update the number of learned tasks in the model
        set_num_tasks_learned(model, task + 1)
        print()

    # Save the results of each task to a CSV file
    for key, value in perm_results.items():
        df = pd.DataFrame(value)
        df.to_csv(f"outputs/prediction/perm_task_{key}_novel.csv")



        
        
    # Load the rotated MNIST dataset
    mnist = RotatingMNIST()
    
    # Initialise the MultitaskFCV2 model for the tasks
    model = MultitaskFCV2(hidden_size=hidden_size, num_tasks=num_tasks)
    
    # Initialise a results dictionary for the rotated MNIST dataset
    rotate_results = {}
    
    
    # Loop through all tasks to load the saved models and extract batch normalisation means values
    for task_id in range(num_tasks):
        current_directory = os.getcwd()
        models_directory = os.path.join(current_directory, 'models')
        prediction_directory = os.path.join(models_directory, 'prediction')
        file_path = os.path.join(prediction_directory, f'pd_rotated_model_task_{task_id}.pth')
        classic_model = MultitaskFC(hidden_size=hidden_size, num_tasks=num_tasks)
        classic_model.load_state_dict(torch.load(file_path), strict=False)
        bn_mean = classic_model.get_bn_means(task_id)
        model.bn_means[task_id] = bn_mean
        print(f"Task {task_id}: bn_mean =", bn_mean)
    
    # Loop over the range of tasks for training and prediction
    for task in range(num_tasks):
        
        print(f"Current task: {task}")
        print(f"Updated bn_means (length: {len(model.bn_means)}): {model.bn_means}")
        
        # Calculate the similarities matrix based on the current state of batch normalisation means
        similarities_matrix = calculate_task_similarityE2(model.bn_means, num_tasks)

        # Calculate the alphas based on the task similarities
        alphas = determine_alphas(similarities_matrix, task)

        # Set the alphas for the multitask masked linear layer
        model.set_alphas(alphas)
        
        # Log the training task number
        print(f"Training for task {task}")
        
        # Initialise a dictionary for each task's results
        rotate_results[task] = {}
                
        # Set the current task in the model
        set_model_task(model, task)
        
        # Update the task in the dataset
        mnist.update_task(task)
        
        # Initialise the optimiser for model parameters that require gradient computation
        optimizer = optim.RMSprop(
            [p for p in model.parameters() if p.requires_grad], lr=1e-4
        )
        
        # Loop over each epoch to train the model
        for e in range(epochs):
            # Use the training function for this model
            trainV2(model, mnist.train_loader, optimizer, e, bn_means=model.bn_means)
            
            # Display validation information
            print("Validation")
            print("============")
            
            # At the end of the last epoch, predict and save the predictions and labels for the task
            if e == epochs - 1:
                rotate_predictions, rotate_labels = predict(model, mnist.val_loader, e)

                rotate_results[task]["rotate_predictions"] = [
                    int(x) for x in rotate_predictions
                ]

                rotate_results[task]["rotate_labels"] = [int(x) for x in rotate_labels]
        
        # Cache the mask states of the model
        cache_masks(model)
        print()
        
        # Update the number of learned tasks in the model
        set_num_tasks_learned(model, task + 1)
        print()
    
    # Save the results of each task to a CSV file
    for key, value in rotate_results.items():
        df = pd.DataFrame(value)
        df.to_csv(f"outputs/prediction/rotate_task_{key}_novel.csv")

        
        


    # Load the partitioned MNIST dataset
    mnist = PartitionMNIST()
    
    # Initialise the MultitaskFCV2 model for the tasks
    model = MultitaskFCV2(hidden_size=hidden_size, num_tasks=num_tasks)
    
    # Initialise the results dictionary for the partitioned MNIST dataset
    part_results = {}
    
    
    # Loop through all tasks to load the saved models and extract the batch normaloisation means values
    for task_id in range(num_tasks):
        current_directory = os.getcwd()
        models_directory = os.path.join(current_directory, 'models')
        prediction_directory = os.path.join(models_directory, 'prediction')
        file_path = os.path.join(prediction_directory, f'pd_partitioned_model_task_{task_id}.pth')
        classic_model = MultitaskFC(hidden_size=hidden_size, num_tasks=num_tasks)
        classic_model.load_state_dict(torch.load(file_path), strict=False)
        bn_mean = classic_model.get_bn_means(task_id)
        model.bn_means[task_id] = bn_mean
        print(f"Task {task_id}: bn_mean =", bn_mean)
    
    # Loop over the range of tasks for training and prediction
    for task in range(num_tasks):
        
        print(f"Current task: {task}")
        print(f"Updated bn_means (length: {len(model.bn_means)}): {model.bn_means}")
        
        # Calculate the similarities matrix based on the current state of batch normalisation means
        similarities_matrix = calculate_task_similarityE2(model.bn_means, num_tasks)

        # Calculate the alphas based on the task similarities
        alphas = determine_alphas(similarities_matrix, task)

        # Set the alphas for the multitask masked linear layer
        model.set_alphas(alphas)
        
        # Initialise a dictionary for each task's results
        part_results[task] = {}
        
        # Log the training task number
        print(f"Training for task {task}")
                
        # Set the current task in the model
        set_model_task(model, task)
        
        # Update the task in the dataset
        mnist.update_task(task)
        
        # Initialise the optimiser (RMSprop) for model parameters that require gradient computation
        optimizer = optim.RMSprop(
            [p for p in model.parameters() if p.requires_grad], lr=1e-4
        )
        
        # Loop over each epoch to train the model
        for e in range(epochs):
            trainV2(model, mnist.train_loader, optimizer, e, bn_means=model.bn_means)
            
            # Display validation information
            print("Validation")
            print("============")
            
            # At the end of the last epoch, predict and save the predictions and labels for the task
            if e == epochs - 1:
                part_predictions, part_labels = predict(model, mnist.val_loader, e)

                part_results[task]["part_predictions"] = [
                    int(x) for x in part_predictions
                ]

                part_results[task]["part_labels"] = [int(x) for x in part_labels]
        
        # Cache the mask states of the model
        cache_masks(model)
        print()
        
        # Update the number of learned tasks in the model
        set_num_tasks_learned(model, task + 1)
        print()
    
    # Save the results of each task to a CSV file
    for key, value in part_results.items():
        df = pd.DataFrame(value)
        df.to_csv(f"outputs/prediction/part_task_{key}_novel.csv")

In [None]:
# Check if the script is run directly
if __name__ == "__main__":
    # Call the function training_novel_pd
    training_novel_pd(2, 10)

## Post-Training Results Visualisation

### Permuted MNIST

#### Classic Approach

In [None]:
# The CSV file names
file_names = ['perm_task_0_classic', 'perm_task_1_classic', 'perm_task_2_classic', 'perm_task_3_classic', 'perm_task_4_classic',
             'perm_task_5_classic', 'perm_task_6_classic', 'perm_task_7_classic', 'perm_task_8_classic', 'perm_task_9_classic']

# A dictionary to store the performance metrics for each file
performance_metrics = {
    'Task': [],
    'Precision': [],
    'Recall': [],
    'F1': [],
    'Accuracy': []
}

# Set seaborn default theme
sns.set_theme()

# Set seaborn plot context
sns.set_context("paper")

# Sum of confusion matrices for all tasks
sum_cm = None

# Define the folder path for the CSV files
folder_path = 'outputs/prediction/'

# Iterate over each file
for idx, file_name in enumerate(file_names):
    # Load the data from the CSV file
    data = pd.read_csv(folder_path + file_name + '.csv')

    # Calculate the performance metrics and round to 2 decimal places
    precision = round(precision_score(data['perm_labels'], data['perm_predictions'], average='macro') * 100, 2)
    recall = round(recall_score(data['perm_labels'], data['perm_predictions'], average='macro') * 100, 2)
    f1 = round(f1_score(data['perm_labels'], data['perm_predictions'], average='macro') * 100, 2)
    accuracy = round(accuracy_score(data['perm_labels'], data['perm_predictions']) * 100, 2)

    # Store the metrics
    performance_metrics['Task'].append(file_name)
    performance_metrics['Precision'].append(precision)
    performance_metrics['Recall'].append(recall)
    performance_metrics['F1'].append(f1)
    performance_metrics['Accuracy'].append(accuracy)

    # Calculate the confusion matrix for the predicted and actual labels
    cm = confusion_matrix(data['perm_labels'], data['perm_predictions'])

    # Add the confusion matrix to the sum
    if sum_cm is None:
        sum_cm = cm
    else:
        sum_cm += cm

# Average the sum of confusion matrices
average_cm = sum_cm / len(file_names)

# Create a DataFrame from the average confusion matrix
average_cm_df = pd.DataFrame(average_cm)

# Create a new matplotlib figure
fig = plt.figure(figsize=(7,5))

# Create a heatmap of the average confusion matrix using seaborn
sns.heatmap(average_cm_df, annot=True, fmt='g')

# Add a title to the plot
plt.title('Average Confusion Matrix Across All Tasks - MNIST Permuted (Classic)')

# Adjust the layout of the plot
fig.tight_layout()

# Save the plot to the figures directory
plt.savefig('figures/perm_average_confusion_matrix_classic.png', bbox_inches='tight', dpi=300)

# Display the plot
plt.show()

# Print the performance metrics as a table
metrics_df = pd.DataFrame(performance_metrics)
print(metrics_df.to_string(index=False))

# Calculate the average of the performance metrics and round to 3 decimal places
average_metrics = {
    'Average Precision': round(sum(performance_metrics['Precision']) / len(performance_metrics['Precision']), 2),
    'Average Recall': round(sum(performance_metrics['Recall']) / len(performance_metrics['Recall']), 2),
    'Average F1': round(sum(performance_metrics['F1']) / len(performance_metrics['F1']), 2),
    'Average Accuracy': round(sum(performance_metrics['Accuracy']) / len(performance_metrics['Accuracy']), 2)
}

# Print the average metrics as a table
average_metrics_df = pd.DataFrame(list(average_metrics.items()), columns=['Metric', 'Value'])
print(average_metrics_df.to_string(index=False))

#### Novel Approach

In [None]:
# The CSV file names
file_names = ['perm_task_0_novel', 'perm_task_1_novel', 'perm_task_2_novel', 'perm_task_3_novel', 'perm_task_4_novel',
             'perm_task_5_novel', 'perm_task_6_novel', 'perm_task_7_novel', 'perm_task_8_novel', 'perm_task_9_novel']

# A dictionary to store the performance metrics for each file
performance_metrics = {
    'Task': [],
    'Precision': [],
    'Recall': [],
    'F1': [],
    'Accuracy': []
}

# Set seaborn default theme
sns.set_theme()

# Set seaborn plot context
sns.set_context("paper")

# Sum of confusion matrices for all tasks
sum_cm = None

# Define the folder path for the CSV files
folder_path = 'outputs/prediction/'

# Iterate over each file
for idx, file_name in enumerate(file_names):
    # Load the data from the CSV file
    data = pd.read_csv(folder_path + file_name + '.csv')

    # Calculate the performance metrics and round to 2 decimal places
    precision = round(precision_score(data['perm_labels'], data['perm_predictions'], average='macro') * 100, 2)
    recall = round(recall_score(data['perm_labels'], data['perm_predictions'], average='macro') * 100, 2)
    f1 = round(f1_score(data['perm_labels'], data['perm_predictions'], average='macro') * 100, 2)
    accuracy = round(accuracy_score(data['perm_labels'], data['perm_predictions']) * 100, 2)

    # Store the metrics
    performance_metrics['Task'].append(file_name)
    performance_metrics['Precision'].append(precision)
    performance_metrics['Recall'].append(recall)
    performance_metrics['F1'].append(f1)
    performance_metrics['Accuracy'].append(accuracy)

    # Calculate the confusion matrix for the predicted and actual labels
    cm = confusion_matrix(data['perm_labels'], data['perm_predictions'])

    # Add the confusion matrix to the sum
    if sum_cm is None:
        sum_cm = cm
    else:
        sum_cm += cm

# Average the sum of confusion matrices
average_cm = sum_cm / len(file_names)

# Create a DataFrame from the average confusion matrix
average_cm_df = pd.DataFrame(average_cm)

# Create a new matplotlib figure
fig = plt.figure(figsize=(7,5))

# Create a heatmap of the average confusion matrix using seaborn
sns.heatmap(average_cm_df, annot=True, fmt='g')

# Add a title to the plot
plt.title('Average Confusion Matrix Across All Tasks - MNIST Permuted (Novel)')

# Adjust the layout of the plot
fig.tight_layout()

# Save the plot to the figures directory
plt.savefig('figures/perm_average_confusion_matrix_novel.png', bbox_inches='tight', dpi=300)

# Display the plot
plt.show()

# Print the performance metrics as a table
metrics_df = pd.DataFrame(performance_metrics)
print(metrics_df.to_string(index=False))

# Calculate the average of the performance metrics
average_metrics = {
    'Average Precision': round(sum(performance_metrics['Precision']) / len(performance_metrics['Precision']), 2),
    'Average Recall': round(sum(performance_metrics['Recall']) / len(performance_metrics['Recall']), 2),
    'Average F1': round(sum(performance_metrics['F1']) / len(performance_metrics['F1']), 2),
    'Average Accuracy': round(sum(performance_metrics['Accuracy']) / len(performance_metrics['Accuracy']), 2)
}

# Print the average metrics as a table
average_metrics_df = pd.DataFrame(list(average_metrics.items()), columns=['Metric', 'Value'])
print(average_metrics_df)

### Rotated MNIST

#### Classic Approach

In [None]:
# The CSV file names
file_names = ['rotate_task_0_classic', 'rotate_task_1_classic', 'rotate_task_2_classic', 'rotate_task_3_classic', 'rotate_task_4_classic',
             'rotate_task_5_classic', 'rotate_task_6_classic', 'rotate_task_7_classic', 'rotate_task_8_classic', 'rotate_task_9_classic']

# A dictionary to store the performance metrics for each file
performance_metrics = {
    'Task': [],
    'Precision': [],
    'Recall': [],
    'F1': [],
    'Accuracy': []
}

# Set seaborn default theme
sns.set_theme()

# Set seaborn plot context
sns.set_context("paper")

# Sum of confusion matrices for all tasks
sum_cm = None

# Define the folder path for the CSV files
folder_path = 'outputs/prediction/'

# Iterate over each file
for idx, file_name in enumerate(file_names):
    # Load the data from the CSV file
    data = pd.read_csv(folder_path + file_name + '.csv')

     # Calculate the performance metrics and round to 2 decimal places
    precision = round(precision_score(data['rotate_labels'], data['rotate_predictions'], average='macro') * 100, 2)
    recall = round(recall_score(data['rotate_labels'], data['rotate_predictions'], average='macro') * 100, 2)
    f1 = round(f1_score(data['rotate_labels'], data['rotate_predictions'], average='macro') * 100, 2)
    accuracy = round(accuracy_score(data['rotate_labels'], data['rotate_predictions']) * 100, 2)

    # Store the metrics
    performance_metrics['Task'].append(file_name)
    performance_metrics['Precision'].append(precision)
    performance_metrics['Recall'].append(recall)
    performance_metrics['F1'].append(f1)
    performance_metrics['Accuracy'].append(accuracy)

    # Calculate the confusion matrix for the predicted and actual labels
    cm = confusion_matrix(data['rotate_labels'], data['rotate_predictions'])

    # Add the confusion matrix to the sum
    if sum_cm is None:
        sum_cm = cm
    else:
        sum_cm += cm

# Average the sum of confusion matrices
average_cm = sum_cm / len(file_names)

# Create a DataFrame from the average confusion matrix
average_cm_df = pd.DataFrame(average_cm)

# Create a new matplotlib figure
fig = plt.figure(figsize=(7,5))

# Create a heatmap of the average confusion matrix using seaborn
sns.heatmap(average_cm_df, annot=True, fmt='g')

# Add a title to the plot
plt.title('Average Confusion Matrix Across All Tasks - MNIST Rotated (Classic)')

# Adjust the layout of the plot
fig.tight_layout()

# Save the plot to the figures directory
plt.savefig('figures/rotate_average_confusion_matrix_classic.png', bbox_inches='tight', dpi=300)

# Display the plot
plt.show()

# Print the performance metrics as a table
metrics_df = pd.DataFrame(performance_metrics)
print(metrics_df.to_string(index=False))

# Calculate the average of the performance metrics
average_metrics = {
    'Average Precision': round(sum(performance_metrics['Precision']) / len(performance_metrics['Precision']), 2),
    'Average Recall': round(sum(performance_metrics['Recall']) / len(performance_metrics['Recall']), 2),
    'Average F1': round(sum(performance_metrics['F1']) / len(performance_metrics['F1']), 2),
    'Average Accuracy': round(sum(performance_metrics['Accuracy']) / len(performance_metrics['Accuracy']), 2)
}

# Print the average metrics as a table
average_metrics_df = pd.DataFrame(list(average_metrics.items()), columns=['Metric', 'Value'])
print(average_metrics_df.to_string(index=False))

#### Novel Approach

In [None]:
# The CSV file names
file_names = ['rotate_task_0_novel', 'rotate_task_1_novel', 'rotate_task_2_novel', 'rotate_task_3_novel', 'rotate_task_4_novel',
             'rotate_task_5_novel', 'rotate_task_6_novel', 'rotate_task_7_novel', 'rotate_task_8_novel', 'rotate_task_9_novel']

# A dictionary to store the performance metrics for each file
performance_metrics = {
    'Task': [],
    'Precision': [],
    'Recall': [],
    'F1': [],
    'Accuracy': []
}

# Set seaborn default theme
sns.set_theme()

# Set seaborn plot context
sns.set_context("paper")

# Sum of confusion matrices for all tasks
sum_cm = None

# Define the folder path for the CSV files
folder_path = 'outputs/prediction/'

# Iterate over each file
for idx, file_name in enumerate(file_names):
    # Load the data from the CSV file
    data = pd.read_csv(folder_path + file_name + '.csv')

    # Calculate the performance metrics and round to 2 decimal places
    precision = round(precision_score(data['rotate_labels'], data['rotate_predictions'], average='macro') * 100, 2)
    recall = round(recall_score(data['rotate_labels'], data['rotate_predictions'], average='macro') * 100, 2)
    f1 = round(f1_score(data['rotate_labels'], data['rotate_predictions'], average='macro') * 100, 2)
    accuracy = round(accuracy_score(data['rotate_labels'], data['rotate_predictions']) * 100, 2)


    # Store the metrics
    performance_metrics['Task'].append(file_name)
    performance_metrics['Precision'].append(precision)
    performance_metrics['Recall'].append(recall)
    performance_metrics['F1'].append(f1)
    performance_metrics['Accuracy'].append(accuracy)

    # Calculate the confusion matrix for the predicted and actual labels
    cm = confusion_matrix(data['rotate_labels'], data['rotate_predictions'])

    # Add the confusion matrix to the sum
    if sum_cm is None:
        sum_cm = cm
    else:
        sum_cm += cm

# Average the sum of confusion matrices
average_cm = sum_cm / len(file_names)

# Create a DataFrame from the average confusion matrix
average_cm_df = pd.DataFrame(average_cm)

# Create a new matplotlib figure
fig = plt.figure(figsize=(7,5))

# Create a heatmap of the average confusion matrix using seaborn
sns.heatmap(average_cm_df, annot=True, fmt='g')

# Add a title to the plot
plt.title('Average Confusion Matrix Across All Tasks - MNIST Rotated (Novel)')

# Adjust the layout of the plot
fig.tight_layout()

# Save the plot to the figures directory
plt.savefig('figures/rotate_average_confusion_matrix_novel.png', bbox_inches='tight', dpi=300)

# Display the plot
plt.show()

# Print the performance metrics as a table
metrics_df = pd.DataFrame(performance_metrics)
print(metrics_df.to_string(index=False))

# Calculate the average of the performance metrics
average_metrics = {
    'Average Precision': round(sum(performance_metrics['Precision']) / len(performance_metrics['Precision']), 2),
    'Average Recall': round(sum(performance_metrics['Recall']) / len(performance_metrics['Recall']), 2),
    'Average F1': round(sum(performance_metrics['F1']) / len(performance_metrics['F1']), 2),
    'Average Accuracy': round(sum(performance_metrics['Accuracy']) / len(performance_metrics['Accuracy']), 2)
}

# Print the average metrics as a table
average_metrics_df = pd.DataFrame(list(average_metrics.items()), columns=['Metric', 'Value'])
print(average_metrics_df.to_string(index=False))

### Partitioned MNIST

#### Classic Approach

In [None]:
# The CSV file names
file_names = ['part_task_0_classic', 'part_task_1_classic', 'part_task_2_classic', 'part_task_3_classic', 'part_task_4_classic',
             'part_task_5_classic', 'part_task_6_classic', 'part_task_7_classic', 'part_task_8_classic', 'part_task_9_classic']

# A dictionary to store the performance metrics for each file
performance_metrics = {
    'Task': [],
    'Precision': [],
    'Recall': [],
    'F1': [],
    'Accuracy': []
}

# Define class labels for the 10 tasks
class_labels = [
    (0, 1), (2, 3), (4, 5), (6, 7), (8, 9),
    (0, 2), (1, 3), (4, 6), (5, 8), (7, 9)
]

# Set seaborn default theme
sns.set_theme()

# Set seaborn plot context
sns.set_context("paper")

# Create a figure with a grid of subplots (5 rows and 2 columns)
fig, axes = plt.subplots(5, 2, figsize=(12, 30))
axes = axes.flatten()

# Define the folder path for the CSV files
folder_path = 'outputs/prediction/'

# Iterate over each file
for idx, file_name in enumerate(file_names):
    # Load the data from the CSV file
    data = pd.read_csv(folder_path + file_name + '.csv')

    # Calculate the performance metrics and round to 3 decimal places
    precision = round(precision_score(data['part_labels'], data['part_predictions'], average='macro') * 100, 2)
    recall = round(recall_score(data['part_labels'], data['part_predictions'], average='macro') * 100, 2)
    f1 = round(f1_score(data['part_labels'], data['part_predictions'], average='macro') * 100, 2)
    accuracy = round(accuracy_score(data['part_labels'], data['part_predictions']) * 100, 2)

    # Store the metrics
    performance_metrics['Task'].append(file_name)
    performance_metrics['Precision'].append(precision)
    performance_metrics['Recall'].append(recall)
    performance_metrics['F1'].append(f1)
    performance_metrics['Accuracy'].append(accuracy)

    # Calculate the confusion matrix for the predicted and actual labels
    cm = confusion_matrix(data['part_labels'], data['part_predictions'])

    # Create a DataFrame from the confusion matrix
    cm_df = pd.DataFrame(cm, index=class_labels[idx], columns=class_labels[idx])

    # Create a heatmap of the confusion matrix using seaborn on the specific subplot
    sns.heatmap(cm_df, annot=True, fmt='g', ax=axes[idx])

    # Add a title to the plot
    axes[idx].set_title(f'Confusion Matrix for {file_name}')

# Adjust the layout of the plot
plt.tight_layout()

# Save the plot to the figures directory
plt.savefig('figures/part_average_confusion_matrix_classic.png', bbox_inches='tight', dpi=300)

# Display the plot
plt.show()

# Print the performance metrics as a table
metrics_df = pd.DataFrame(performance_metrics)
print(metrics_df.to_string(index=False))

# Calculate the average of the performance metrics
average_metrics = {
    'Average Precision': round(sum(performance_metrics['Precision']) / len(performance_metrics['Precision']), 2),
    'Average Recall': round(sum(performance_metrics['Recall']) / len(performance_metrics['Recall']), 2),
    'Average F1': round(sum(performance_metrics['F1']) / len(performance_metrics['F1']), 2),
    'Average Accuracy': round(sum(performance_metrics['Accuracy']) / len(performance_metrics['Accuracy']), 2)
}

# Print the average metrics as a table
average_metrics_df = pd.DataFrame(list(average_metrics.items()), columns=['Metric', 'Value'])
print(average_metrics_df.to_string(index=False))

#### Novel Approach

In [None]:
# The CSV file names
file_names = ['part_task_0_novel', 'part_task_1_novel', 'part_task_2_novel', 'part_task_3_novel', 'part_task_4_novel',
             'part_task_5_novel', 'part_task_6_novel', 'part_task_7_novel', 'part_task_8_novel', 'part_task_9_novel']

# A dictionary to store the performance metrics for each file
performance_metrics = {
    'Task': [],
    'Precision': [],
    'Recall': [],
    'F1': [],
    'Accuracy': []
}

# Define class labels for the 10 tasks
class_labels = [
    (0, 1), (2, 3), (4, 5), (6, 7), (8, 9),
    (0, 2), (1, 3), (4, 6), (5, 8), (7, 9)
]

# Set seaborn default theme
sns.set_theme()

# Set seaborn plot context
sns.set_context("paper")

# Create a figure with a grid of subplots (5 rows and 2 columns)
fig, axes = plt.subplots(5, 2, figsize=(12, 30))
axes = axes.flatten()

# Define the folder path for the CSV files
folder_path = 'outputs/prediction/'

# Iterate over each file
for idx, file_name in enumerate(file_names):
    # Load the data from the CSV file
    data = pd.read_csv(folder_path + file_name + '.csv')

   # Calculate the performance metrics and round to 3 decimal places
    precision = round(precision_score(data['part_labels'], data['part_predictions'], average='macro') * 100, 2)
    recall = round(recall_score(data['part_labels'], data['part_predictions'], average='macro') * 100, 2)
    f1 = round(f1_score(data['part_labels'], data['part_predictions'], average='macro') * 100, 2)
    accuracy = round(accuracy_score(data['part_labels'], data['part_predictions']) * 100, 2)

    # Store the metrics
    performance_metrics['Task'].append(file_name)
    performance_metrics['Precision'].append(precision)
    performance_metrics['Recall'].append(recall)
    performance_metrics['F1'].append(f1)
    performance_metrics['Accuracy'].append(accuracy)

    # Calculate the confusion matrix for the predicted and actual labels
    cm = confusion_matrix(data['part_labels'], data['part_predictions'])

    # Create a DataFrame from the confusion matrix
    cm_df = pd.DataFrame(cm, index=class_labels[idx], columns=class_labels[idx])

    # Create a heatmap of the confusion matrix using seaborn on the specific subplot
    sns.heatmap(cm_df, annot=True, fmt='g', ax=axes[idx])

    # Add a title to the plot
    axes[idx].set_title(f'Confusion Matrix for {file_name}')

# Adjust the layout of the plot
plt.tight_layout()

# Save the plot to the figures directory
plt.savefig('figures/part_average_confusion_matrix_novel.png', bbox_inches='tight', dpi=300)

# Display the plot
plt.show()

# Print the performance metrics as a table
metrics_df = pd.DataFrame(performance_metrics)
print(metrics_df.to_string(index=False))

# Calculate the average of the performance metrics
average_metrics = {
    'Average Precision': round(sum(performance_metrics['Precision']) / len(performance_metrics['Precision']), 2),
    'Average Recall': round(sum(performance_metrics['Recall']) / len(performance_metrics['Recall']), 2),
    'Average F1': round(sum(performance_metrics['F1']) / len(performance_metrics['F1']), 2),
    'Average Accuracy': round(sum(performance_metrics['Accuracy']) / len(performance_metrics['Accuracy']), 2)
}

# Print the average metrics as a table
average_metrics_df = pd.DataFrame(list(average_metrics.items()), columns=['Metric', 'Value'])
print(average_metrics_df.to_string(index=False))

-------------------------------------------------------------------------------------------------------------------------------

#### Code adapted from:

* https://github.com/pytorch
* https://github.com/RAIVNLab/supsup