# MSE per Seed

In [1]:
# %% [markdown]
# # CIFAR-100 Retrained vs Redistributed MSE Table (Per-Seed)

# %%
import os
import numpy as np
import pandas as pd
from lib.utils import model_results_from_npz  # existing function

# %%
# ----- Config -----
NUM_CLASSES = 100
SEED = 602  # change as needed
EPOCHS = 50
CLASSES = [f"class{i}" for i in range(NUM_CLASSES)]  # Replace with actual CIFAR-100 class names
REMOVED_CLASSES = [14, 23, 35, 49, 53, 61, 68, 72, 88, 97]

#REMOVED_CLASSES = [
    #4, 7, 6, 11, 14, 15, 19, 24, 23, 25, 29, 30, 
    #31, 36, 35, 39, 44, 41, 49, 48, 51, 53, 59, 57, 60, 61, 
    #68, 67, 72, 71, 76, 77, 80, 84, 88, 85, 92, 94, 99, 97] # CIFAR-100

DIR_RETRAIN = "../results/cifar100/"
DIR_REDIS   = "../results/redis_cifar100/"

# %%
# ----- Collect MSE results per removed class -----
rows = []

for removed_idx in REMOVED_CLASSES:
    removed_name = CLASSES[removed_idx]

    retrain_path = os.path.join(DIR_RETRAIN, f"cifar100_resnet18_s{SEED}_e{EPOCHS}_r{removed_idx}.npz")
    redis_path   = os.path.join(DIR_REDIS,   f"cifar_resnet_s{SEED}_e{EPOCHS}_rd{removed_idx}.npz")

    if not (os.path.exists(retrain_path) and os.path.exists(redis_path)):
        print(f"Skipping missing class {removed_name}")
        continue

    # Load models
    model_retrain = model_results_from_npz(retrain_path, NUM_CLASSES)
    model_redis   = model_results_from_npz(redis_path, NUM_CLASSES)

    # Compute MSE per sample
    conf_retrain = np.vstack([model_retrain.confidences[i] for i in range(NUM_CLASSES)])
    conf_redis   = np.vstack([model_redis.confidences[i] for i in range(NUM_CLASSES)])

    mse_per_sample = np.mean((conf_retrain - conf_redis) ** 2, axis=1)  # collapse vector -> scalar

    rows.append({
        "removed_class": removed_idx,
        "removed_name": removed_name,
        "mean_mse": mse_per_sample.mean(),
        "variance_mse": mse_per_sample.var(),
        "stdev_mse": mse_per_sample.std(),
        "min_mse": mse_per_sample.min(),
        "max_mse": mse_per_sample.max(),
    })

# %%
# ----- Build DataFrame -----
df = pd.DataFrame(rows)
df
# Optionally save:
df.to_csv(f"../analytics/CIFAR100/mse/redis_vs_retrain_mse_s{SEED}.csv", index=False)


# MSE All Seed

In [4]:
# %% [markdown]
# # CIFAR-100 Retrained vs Redistributed MSE Table (Stacked Seeds)

# %%
import os
import numpy as np
import pandas as pd
from lib.utils import model_results_from_npz

# %%
# ----- Config -----
NUM_CLASSES = 100
SEEDS = [42, 602, 311, 637, 800, 543, 969, 122, 336, 93]
EPOCHS = 50
CLASSES = [f"class{i}" for i in range(NUM_CLASSES)]  # Replace with actual CIFAR-100 class names
REMOVED_CLASSES = [14, 23, 35, 49, 53, 61, 68, 72, 88, 97]

DIR_RETRAIN = "../results/cifar100/"
DIR_REDIS   = "../results/redis_cifar100/"

# %%
# ----- Collect MSE across all seeds -----
rows = []

for seed in SEEDS:
    print(f"Processing seed {seed}...")
    for removed_idx in REMOVED_CLASSES:
        removed_name = CLASSES[removed_idx]

        retrain_path = os.path.join(DIR_RETRAIN, f"cifar100_resnet18_s{seed}_e{EPOCHS}_r{removed_idx}.npz")
        redis_path   = os.path.join(DIR_REDIS,   f"cifar_resnet_s{seed}_e{EPOCHS}_rd{removed_idx}.npz")

        if not (os.path.exists(retrain_path) and os.path.exists(redis_path)):
            print(f"  Skipping missing class {removed_name}")
            continue

        model_retrain = model_results_from_npz(retrain_path, NUM_CLASSES)
        model_redis   = model_results_from_npz(redis_path, NUM_CLASSES)

        mse_per_sample = np.mean((np.vstack([model_retrain.confidences[i] for i in range(NUM_CLASSES)]) -
                                  np.vstack([model_redis.confidences[i] for i in range(NUM_CLASSES)]))**2, axis=1)

        mean_mse = mse_per_sample.mean()
        min_mse  = mse_per_sample.min()
        max_mse  = mse_per_sample.max()

        rows.append({
            "seed": seed,
            "removed_class": removed_idx,
            "removed_name": removed_name,
            "mean_mse": mean_mse,
            "min_mse": min_mse,
            "max_mse": max_mse
        })

# %%
# ----- Build DataFrame -----
df_all = pd.DataFrame(rows)
df_all
# Optionally save:
df_all.to_csv("../analytics/CIFAR100/mse/redis_vs_retrain_mse_all_seeds.csv", index=False)


Processing seed 42...
Processing seed 602...
Processing seed 311...
Processing seed 637...
Processing seed 800...
Processing seed 543...
Processing seed 969...
Processing seed 122...
Processing seed 336...
Processing seed 93...
