# MSE per Seed

In [2]:
# %% [markdown]
# # CIFAR-10 Retrained vs Redistributed MSE Table

# %%
import os
import numpy as np
import pandas as pd
from lib.utils import model_results_from_npz  # Use your existing function

# %%
# ----- Config -----
NUM_CLASSES = 10
SEED = 969
EPOCHS = 50
CLASSES = ["airplane","automobile","bird","cat","deer",
           "dog","frog","horse","ship","truck"]

DIR_RETRAIN = "../results/cifar10/"
DIR_REDIS   = "../results/redis_cifar10/"

# %%
# ----- Collect results per removed class -----
rows = []

for removed_idx in range(NUM_CLASSES):
    removed_name = CLASSES[removed_idx]

    retrain_path = os.path.join(DIR_RETRAIN, f"cifar_resnet_s{SEED}_e{EPOCHS}_r{removed_idx}.npz")
    redis_path   = os.path.join(DIR_REDIS,   f"cifar_resnet_s{SEED}_e{EPOCHS}_rd{removed_idx}.npz")

    if not (os.path.exists(retrain_path) and os.path.exists(redis_path)):
        print(f"Skipping missing class {removed_name}")
        continue

    # Load models
    model_retrain = model_results_from_npz(retrain_path, NUM_CLASSES)
    model_redis   = model_results_from_npz(redis_path, NUM_CLASSES)

    # Stack all sample confidence vectors
    conf_retrain = np.vstack([model_retrain.confidences[i] for i in range(NUM_CLASSES)])
    conf_redis   = np.vstack([model_redis.confidences[i] for i in range(NUM_CLASSES)])

    # Compute per-sample MSE
    mse_per_sample = np.mean((conf_retrain - conf_redis) ** 2, axis=1)

    # Collapse per-sample vector first (already done above), then compute scalar statistics
    mean_mse = mse_per_sample.mean()
    var_mse  = mse_per_sample.var()
    std_mse  = mse_per_sample.std()
    min_mse  = mse_per_sample.min()
    max_mse  = mse_per_sample.max()

    rows.append({
        "removed_class": removed_idx,
        "removed_name": removed_name,
        "mean_mse": mean_mse,
        "variance_mse": var_mse,
        "stdev_mse": std_mse,
        "min_mse": min_mse,
        "max_mse": max_mse,
    })

# %%
# ----- Build DataFrame -----
df = pd.DataFrame(rows)
df
# Optionally save as CSV:
df.to_csv(f"../analytics/CIFAR10/mse/redis_vs_retrain_mse_s{SEED}.csv", index=False)


# MSE All Seed

In [1]:
# %% [markdown]
# # CIFAR-10 Retrained vs Redistributed MSE Table (All Seeds)

# %%
import os
import numpy as np
import pandas as pd
from lib.utils import model_results_from_npz  # Use your existing function

# %%
# ----- Config -----
NUM_CLASSES = 10
SEEDS = [42, 602, 311, 637, 800, 543, 969, 122, 336, 93]  # Example seeds
EPOCHS = 50
CLASSES = ["airplane","automobile","bird","cat","deer",
           "dog","frog","horse","ship","truck"]

DIR_RETRAIN = "../results/cifar10/"
DIR_REDIS   = "../results/redis_cifar10/"

# %%
# ----- Collect results per removed class and per seed -----
rows = []

for seed in SEEDS:
    print(f"Processing seed {seed}...")
    for removed_idx in range(NUM_CLASSES):
        removed_name = CLASSES[removed_idx]

        retrain_path = os.path.join(DIR_RETRAIN, f"cifar_resnet_s{seed}_e{EPOCHS}_r{removed_idx}.npz")
        redis_path   = os.path.join(DIR_REDIS,   f"cifar_resnet_s{seed}_e{EPOCHS}_rd{removed_idx}.npz")

        if not (os.path.exists(retrain_path) and os.path.exists(redis_path)):
            print(f"  Skipping missing class {removed_name}")
            continue

        # Load models
        model_retrain = model_results_from_npz(retrain_path, NUM_CLASSES)
        model_redis   = model_results_from_npz(redis_path, NUM_CLASSES)

        # Stack all class confidence vectors deterministically
        conf_retrain = np.vstack([model_retrain.confidences[i] for i in range(NUM_CLASSES)])
        conf_redis   = np.vstack([model_redis.confidences[i] for i in range(NUM_CLASSES)])

        # Compute per-sample MSE
        mse_per_sample = np.mean((conf_retrain - conf_redis) ** 2, axis=1)

        # Collapse vector first -> scalar per sample, then compute mean, min, max
        mean_mse = mse_per_sample.mean()
        min_mse  = mse_per_sample.min()
        max_mse  = mse_per_sample.max()

        rows.append({
            "seed": seed,
            "removed_class": removed_idx,
            "removed_name": removed_name,
            "mean_mse": mean_mse,
            "min_mse": min_mse,
            "max_mse": max_mse,
        })

# %%
# ----- Build DataFrame -----
df_all_seeds = pd.DataFrame(rows)
df_all_seeds
# Optionally save as CSV
df_all_seeds.to_csv("../analytics/CIFAR10/mse/redis_vs_retrain_mse_all_seeds.csv", index=False)


Processing seed 42...
Processing seed 602...
Processing seed 311...
Processing seed 637...
Processing seed 800...
Processing seed 543...
Processing seed 969...
Processing seed 122...
Processing seed 336...
Processing seed 93...
