In [1]:
import torch
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader

# Load dataset features and labels
def load_dataset(path, labeled=True):
    data = torch.load(path)
    if labeled:
        return data['features'], data['labels']
    return data['features']

# Calculate class prototypes
def calculate_prototypes(features, labels, num_classes=10):
    prototypes = []
    for c in range(num_classes):
        # Ensure we're working with the correct feature dimension
        class_features = features[labels == c]
        if len(class_features) > 0:
            prototypes.append(class_features.mean(dim=0))
        else:
            # If no features for a class, create a zero vector of correct dimension
            prototypes.append(torch.zeros(features.shape[1]))
    return torch.stack(prototypes)

# Prototype-based prediction
def predict(features, prototypes):
    # Ensure features are float type
    features = features.float()
    prototypes = prototypes.float()
    
    # Use cosine similarity instead of Euclidean distance for high-dimensional features
    similarities = F.cosine_similarity(features.unsqueeze(1), prototypes.unsqueeze(0), dim=2)
    return torch.argmax(similarities, dim=1)

# Prototype contrastive learning loss
def prototype_contrastive_loss(features, pseudo_labels, prototypes, temperature=0.1):
    # Ensure features and prototypes are float
    features = features.float()
    prototypes = prototypes.float()
    
    # Compute cosine similarities
    logits = F.cosine_similarity(features.unsqueeze(1), prototypes.unsqueeze(0), dim=2) / temperature
    loss = F.cross_entropy(logits, pseudo_labels)
    return loss

# Knowledge distillation loss
def knowledge_distillation_loss(student_logits, teacher_logits, temperature=2.0):
    teacher_probs = F.softmax(teacher_logits / temperature, dim=1)
    student_probs = F.log_softmax(student_logits / temperature, dim=1)
    return F.kl_div(student_probs, teacher_probs, reduction="batchmean") * (temperature ** 2)

# Training on D1
def train_labeled_dataset(features, labels, num_classes=10):
    prototypes = calculate_prototypes(features, labels, num_classes)
    return prototypes

# Update prototypes using PCL and KD
def continual_learning(features, prototypes, prev_model, pseudo_labels, num_classes=10, lr=0.01):
    # Ensure all tensors are float
    features = features.float()
    prototypes = prototypes.float()
    
    # Make prototypes a learnable parameter
    prototypes = torch.nn.Parameter(prototypes.clone(), requires_grad=True)
    optimizer = torch.optim.SGD([prototypes], lr=lr)
    
    for _ in range(50):  # Increased iterations for better convergence
        optimizer.zero_grad()
        
        # Prototype Contrastive Learning Loss
        pcl_loss = prototype_contrastive_loss(features, pseudo_labels, prototypes)
        
        # Knowledge Distillation Loss (if previous model exists)
        if prev_model is not None:
            with torch.no_grad():
                teacher_logits = F.cosine_similarity(features.unsqueeze(1), prev_model.unsqueeze(0), dim=2)
            student_logits = F.cosine_similarity(features.unsqueeze(1), prototypes.unsqueeze(0), dim=2)
            kd_loss = knowledge_distillation_loss(student_logits, teacher_logits)
            loss = pcl_loss + kd_loss
        else:
            loss = pcl_loss
        
        loss.backward()
        optimizer.step()
    
    return prototypes.detach()

# Evaluate the model
def evaluate(features, labels, prototypes):
    predictions = predict(features, prototypes)
    accuracy = (predictions == labels).float().mean().item()
    return accuracy

# Main continual learning pipeline
def continual_learning_pipeline(data_path, eval_data_path, num_datasets=20, num_classes=10):
    accuracy_matrix = np.zeros((num_datasets, num_datasets))
    prev_prototypes = None
    # Load labeled dataset (D1)
    train_features, train_labels = load_dataset(f"{data_path}/features_dataset_1.pth")
    
    # Ensure features are float and have the expected 1280-dimensional shape
    train_features = train_features.float()
    assert train_features.shape[1] == 1280, f"Expected 1280 features, got {train_features.shape[1]}"
    
    prototypes = train_labeled_dataset(train_features, train_labels, num_classes)
    
    for i in range(1, num_datasets + 1):
        if i > 1 and i<11:  # Unlabeled datasets
            train_features = load_dataset(f"{data_path}/features_dataset_{i}.pth", labeled=False)
            
            # Ensure features are float and have the expected 1280-dimensional shape
            train_features = train_features.float()
            assert train_features.shape[1] == 1280, f"Expected 1280 features, got {train_features.shape[1]}"
            
            pseudo_labels = predict(train_features, prototypes)
            prototypes = continual_learning(train_features, prototypes, prev_prototypes, pseudo_labels, num_classes)
        elif i>=11:
            train_features = load_dataset(f"{data_path+'2'}/features_dataset_{i-10}.pth", labeled=False)
            
            # Ensure features are float and have the expected 1280-dimensional shape
            train_features = train_features.float()
            assert train_features.shape[1] == 1280, f"Expected 1280 features, got {train_features.shape[1]}"
            
            pseudo_labels = predict(train_features, prototypes)
            prototypes = continual_learning(train_features, prototypes, prev_prototypes, pseudo_labels, num_classes)
        prev_prototypes = prototypes.clone()
        
        # Evaluate on all evaluation datasets from 1 to i
        for j in range(1, i + 1):
            if j<11:
                eval_features, eval_labels = load_dataset(f"{eval_data_path}/eval_features_dataset_{j}.pth")
            else:
                eval_features, eval_labels = load_dataset(f"{eval_data_path+'2'}/eval_features_dataset_{j-10}.pth")
            # Ensure features are float and have the expected 1280-dimensional shape
            eval_features = eval_features.float()
            assert eval_features.shape[1] == 1280, f"Expected 1280 features, got {eval_features.shape[1]}"
            
            accuracy_matrix[i - 1, j - 1] = evaluate(eval_features, eval_labels, prototypes)
    
    return accuracy_matrix

# Run pipeline for both datasets (saved_data and saved_data2)
data_path_1 = "saved_data"
eval_data_path_1 = "saved_data"
accuracy_matrix_1 = continual_learning_pipeline(data_path_1, eval_data_path_1)

data_path_2 = "saved_data2"
eval_data_path_2 = "saved_data2"
# accuracy_matrix_2 = continual_learning_pipeline(data_path_2, eval_data_path_2)

print("Accuracy Matrix for Saved Data 1:\n", accuracy_matrix_1)
print("Accuracy Matrix for Saved Data 2:\n", accuracy_matrix_2)

Accuracy Matrix for Saved Data 1:
 [[0.8488     0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.8488     0.85000002 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.8488     0.85039997 0.84600002 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.8484     0.85039997 0.84600002 0.85039997 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.8488     0.85039997 0.84600002 0.85039997 0.84960002 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0

NameError: name 'accuracy_matrix_2' is not defined