In [42]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from tabulate import tabulate

# Global variables
scores = np.zeros((20, 20), dtype=float, order='C')

def fit(X, y, n_classes, feature_dim):
    """Initialize prototypes using the training data."""
    prototypes = np.zeros((n_classes, feature_dim))
    class_counts = np.zeros(n_classes)
    for i in range(n_classes):
        class_data = X[y == i]
        if class_data.size > 0:
            prototypes[i] = np.mean(class_data, axis=0)
            class_counts[i] = len(class_data)
    return prototypes, class_counts

def predict(X, prototypes):
    """Predict labels based on nearest prototypes."""
    distances = euclidean_distances(X, prototypes)
    return np.argmin(distances, axis=1)

def computesimilarity(prototype, new_data_mean):
    norm_prototype = prototype / np.linalg.norm(prototype)
    norm_new_data = new_data_mean / np.linalg.norm(new_data_mean)
    return np.dot(norm_prototype, norm_new_data)  # Cosine similarity

def update(X, y_pred, prototypes, class_counts, n_classes):
    """Update prototypes with pseudo-labels."""
    k=-0.5
    for i in range(n_classes):
        class_data = X[y_pred == i]
        if class_data.size > 0:
            updated_prototype = (
                (1 - k) * prototypes[i] +  # Hyperparameter to tune
                k * (
                    class_counts[i] * prototypes[i] + np.sum(class_data, axis=0)
                ) / (class_counts[i] + len(class_data))
            )
            prototypes[i] = updated_prototype
            class_counts[i] += len(class_data)
    return prototypes, class_counts

def load_dataset(file_path, i=0, is_eval=False):
    """Load dataset from a .npz file."""
    data = np.load(file_path)
    if is_eval:
        return data['x'], data['y']
    if i == 0:
        return data['x'], data['y']
    return data['x']

def train_and_evaluate(data_paths, n_classes, feature_dim):
    #scores

    prototypes, class_counts = np.zeros((n_classes, feature_dim)), np.zeros(n_classes)
    
    for i in range(len(data_paths)):
        train_file, eval_file = data_paths[i]
        
        if i == 0:
            X_train, y_train = load_dataset(train_file, i)
        else:
            X_train = load_dataset(train_file, i)
        
        #X_eval, y_eval = load_dataset(eval_file, is_eval=True)

        if i == 0:
            # Train the first model on D1
            prototypes, class_counts = fit(X_train, y_train, n_classes, feature_dim)
        else:
            # Predict pseudo-labels for training data
            y_pred_train = predict(X_train, prototypes)
            # Update the model using the pseudo-labeled data
            prototypes, class_counts = update(X_train, y_pred_train, prototypes, class_counts, n_classes)

        # Evaluate the model on all evaluation datasets up to current
        for j in range(i + 1):
            eval_file = data_paths[j][1]
            X_eval, y_eval = load_dataset(eval_file, is_eval=True)
            y_pred_eval = predict(X_eval, prototypes)
            accuracy = np.mean(y_pred_eval == y_eval)
            scores[i][j] = accuracy * 100

    df = pd.DataFrame(scores)
    df.index = [f"F_{i+1}" for i in range(20)]
    df.columns = [f"D{i+1}" for i in range(20)]
    #display(df)
    display(df.iloc[:, :20])

    return prototypes

# Paths to your datasets
dataset_paths = [
    ("ds1_train.npz", "ds1_eval.npz"),
    ("ds2_train.npz", "ds2_eval.npz"),
    ("ds3_train.npz", "ds3_eval.npz"),
    ("ds4_train.npz", "ds4_eval.npz"),
    ("ds5_train.npz", "ds5_eval.npz"),
    ("ds6_train.npz", "ds6_eval.npz"),
    ("ds7_train.npz", "ds7_eval.npz"),
    ("ds8_train.npz", "ds8_eval.npz"),
    ("ds9_train.npz", "ds9_eval.npz"),
    ("ds10_train.npz", "ds10_eval.npz"),
    ("ds11_train.npz", "ds11_eval.npz"),
    ("ds12_train.npz", "ds12_eval.npz"),
    ("ds13_train.npz", "ds13_eval.npz"),
    ("ds14_train.npz", "ds14_eval.npz"),
    ("ds15_train.npz", "ds15_eval.npz"),
    ("ds16_train.npz", "ds16_eval.npz"),
    ("ds17_train.npz", "ds17_eval.npz"),
    ("ds18_train.npz", "ds18_eval.npz"),
    ("ds19_train.npz", "ds19_eval.npz"),
    ("ds20_train.npz", "ds20_eval.npz"),
]

# Number of classes and feature dimension
n_classes = 10
feature_dim = 768  # Adjust this based on your dataset

# Train and evaluate
models = train_and_evaluate(dataset_paths, n_classes, feature_dim)


Unnamed: 0,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D20
F_1,95.84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_2,95.8,95.64,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_3,95.8,95.84,95.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_4,95.72,95.84,95.28,95.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_5,95.88,95.76,95.36,95.92,96.16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_6,95.88,95.88,95.44,95.84,96.08,96.16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_7,95.88,95.88,95.48,95.96,96.08,96.2,95.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_8,95.92,95.88,95.48,95.92,96.08,96.2,95.48,95.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_9,95.92,95.92,95.44,95.92,96.12,96.2,95.44,95.56,96.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F_10,96.0,95.96,95.48,95.96,96.12,96.32,95.44,95.56,96.48,96.28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
