In [None]:
import os,sys
sys.path.insert(0, os.path.abspath('..'))
import random
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader
from pkldataset import PKLDataset
from helpers import set_seed, get_model, eval_model, train_model


if __name__ == '__main__':
    # Configuration
    train_paths = ["../datasets/RPDC197/train_20", "../datasets/RPDC197/train_50", "../datasets/RPDC197/train_100", "../datasets/RPDC197/train_200", "../datasets/RPDC197/train_300",
 "../datasets/RPDC197/train_400", "../datasets/RPDC197/train_500", "../datasets/RPDC197/train_600"]

    # Validation datasets to test each model on
    val_paths = [
        "../datasets/RPDC185/val_1000",
        "../datasets/RPDC188/val_1000",
        "../datasets/RPDC191/val_1000",
        "../datasets/RPDC194/val_1000",
        "../datasets/RPDC197/val_1000",
    ]
    seeds = [101, 202, 303, 404, 505, 606, 707, 808, 909, 1001]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()

    # Container for results: {train_path: {val_path: [accs]}}
    results = {tp: {vp: [] for vp in val_paths} for tp in train_paths}

    # Multi-seed evaluation
    for seed in seeds:
        print(f"\n=== Seed {seed} ===")
        set_seed(seed)
        for tp in train_paths:
            print(f"-- Training on {tp}")
            # Prepare combined dataset
            ds_real = PKLDataset(tp)
            train_loader = DataLoader(ds_real, batch_size=32, shuffle=True)

            # Initialize and train model
            model = get_model().to(device)
            optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)
            model = train_model(model, train_loader, criterion,
                                optimizer, scheduler, num_epochs=50,
                                device=device)

            # Evaluate on each validation set
            for vp in val_paths:
                val_loader = DataLoader(PKLDataset(vp), batch_size=64, shuffle=False)
                acc = eval_model(model, val_loader, device)
                results[tp][vp].append(acc)
                print(f"[{tp} -> {vp}] Seed {seed}: Acc = {acc:.2f}%")

    # Summary
    print("\n=== Summary across seeds ===")
    for tp in train_paths:
        for vp in val_paths:
            arr = np.array(results[tp][vp])
            mean, std = arr.mean(), arr.std(ddof=1)
            print(f"{tp} -> {vp}: Mean = {mean:.2f}%, Std = {std:.2f}%")


In [None]:
# Build the summary dict keyed by the numeric RPDC ID
data = {}
for vp in val_paths:
    # extract e.g. 185 from "RPDC185/val_1000"
    key = int(vp.split('/')[-2].replace('RPDC', ''))
    means = []
    stds = []
    for tp in train_paths:
        arr = np.array(results[tp][vp])
        means.append(round(arr.mean(), 2))
        stds.append(round(arr.std(ddof=1), 2))
    data[key] = {'mean': means, 'std': stds}

In [None]:
data