In [1]:
# Passive Classification Tuning with Cross-Validation and Multiple Trials
import os
import json
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from typing import Dict
import torch
from tqdm import tqdm
import time

from alnn.models import OneHiddenMLP
from alnn.training import train_passive
from alnn.evaluation import evaluate_classification
import torch.nn as nn
from alnn.training import TrainConfig

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from torch.utils.data import TensorDataset, DataLoader

FIGURES_DIR = os.path.join('..', 'report', 'figures')
DATA_DIR = os.path.join('..', 'data')
os.makedirs(FIGURES_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)

DATASETS = ['iris', 'wine', 'breast_cancer']
LR = [1e-3, 3e-3, 1e-2, 3e-2]
WD = [0.0, 1e-5, 1e-4]
HIDDEN = [32, 64, 128]
BS = [32, 64]
N_TRIALS = 5  # Number of random seeds for each config
N_FOLDS = 5  # Number of CV folds

In [2]:
def evaluate_config_cv(dataset: str, lr: float, wd: float, hidden: int, bs: int) -> Dict[str, float]:
    """Evaluate a configuration using cross-validation across multiple trials."""
    all_metrics = []
    
    for trial in range(N_TRIALS):
        # Set random seed for reproducibility
        torch.manual_seed(42 + trial)
        np.random.seed(42 + trial)
        
        trial_metrics = []
        
        # Load data for CV splits
        if dataset == "iris":
            ds = datasets.load_iris()
        elif dataset == "wine":
            ds = datasets.load_wine()
        elif dataset == "breast_cancer":
            ds = datasets.load_breast_cancer()
        
        X, y = ds.data, ds.target
        
        # Use StratifiedKFold for classification
        skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42 + trial)
        
        for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
            X_train, X_val = X[train_idx], X[val_idx]
            y_train, y_val = y[train_idx], y[val_idx]
            
            # Standardize features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_val_scaled = scaler.transform(X_val)
            
            # Convert to tensors
            X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
            y_train_tensor = torch.tensor(y_train, dtype=torch.long)
            X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
            y_val_tensor = torch.tensor(y_val, dtype=torch.long)
            
            # Create datasets
            train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
            
            train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=bs, shuffle=False)
            
            # Train model
            model = OneHiddenMLP(input_dim=X_train_scaled.shape[1], hidden_units=hidden, output_dim=len(np.unique(y)))
            loss_fn = nn.CrossEntropyLoss()
            config = TrainConfig(learning_rate=lr, weight_decay=wd, batch_size=bs, max_epochs=200, patience=20, device='cpu')
            
            train_passive(model, train_loader, val_loader, loss_fn, config)
            
            # Evaluate
            metrics = evaluate_classification(model, val_loader, device='cpu')
            trial_metrics.append(metrics)
        
        # Average across folds for this trial
        trial_avg = {}
        for key in trial_metrics[0].keys():
            trial_avg[key] = np.mean([m[key] for m in trial_metrics])
        all_metrics.append(trial_avg)
    
    # Average across trials and compute std
    final_metrics = {}
    for key in all_metrics[0].keys():
        values = [m[key] for m in all_metrics]
        final_metrics[f'{key}_mean'] = float(np.mean(values))
        final_metrics[f'{key}_std'] = float(np.std(values, ddof=1))
    
    return final_metrics

In [4]:
import json
from pathlib import Path

path = Path(f"{DATA_DIR}/passive_cls_checkpoint.json")
with path.open() as f:
    data = json.load(f)

In [8]:
len(data["results"]["iris"]['history'])

144

In [3]:
# Load checkpoint if exists
checkpoint_file = os.path.join(DATA_DIR, 'passive_cls_checkpoint.json')
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'r') as f:
        checkpoint = json.load(f)
    print(f"Resuming from checkpoint: {checkpoint.get('completed_configs', 0)} configs completed")
    BEST = checkpoint.get('results', {})
    
    # Determine which dataset to resume from
    dataset_configs = len(LR) * len(WD) * len(HIDDEN) * len(BS)
    completed_configs = checkpoint.get('completed_configs', 0)
    
    # Find which dataset we should resume from
    resume_dataset_idx = completed_configs // dataset_configs
    resume_config_idx = completed_configs % dataset_configs
    
    print(f"Resuming from dataset {resume_dataset_idx} ({DATASETS[resume_dataset_idx] if resume_dataset_idx < len(DATASETS) else 'completed'}), config {resume_config_idx}")
    
else:
    checkpoint = {'completed_configs': 0, 'results': {}}
    BEST = {}
    resume_dataset_idx = 0
    resume_config_idx = 0
    print("Starting fresh run")

# Calculate total configs
total_configs = len(DATASETS) * len(LR) * len(WD) * len(HIDDEN) * len(BS)
start_time = time.time()

# Process datasets starting from the resume point
for dataset_idx, dataset in enumerate(DATASETS):
    if dataset not in BEST:
        BEST[dataset] = {"best_cfg": None, "best_metric": -np.inf, "history": []}
    
    print(f"\n=== Tuning {dataset} ===")
    best_metric = BEST[dataset]["best_metric"]
    best_cfg = BEST[dataset]["best_cfg"]
    hist = BEST[dataset]["history"]
    
    dataset_configs = len(LR) * len(WD) * len(HIDDEN) * len(BS)
    
    # Determine starting point for this dataset
    if dataset_idx < resume_dataset_idx:
        # This dataset is already completed, skip it
        print(f"Skipping {dataset} (already completed)")
        continue
    elif dataset_idx == resume_dataset_idx:
        # This is the dataset we need to resume from
        start_config_idx = resume_config_idx
        print(f"Resuming {dataset} from config {start_config_idx + 1}/{dataset_configs}")
    else:
        # This dataset hasn't been started yet
        start_config_idx = 0
        print(f"Starting {dataset} from config 1/{dataset_configs}")
    
    # Create progress bar for this dataset
    pbar = tqdm(total=dataset_configs, desc=f"{dataset} configs", 
                initial=len(hist), position=0, leave=True)
    
    config_count = 0
    for lr, wd, hidden, bs in itertools.product(LR, WD, HIDDEN, BS):
        # Skip configs that were already completed
        if config_count < start_config_idx:
            config_count += 1
            continue
            
        config_idx = len(hist) + 1
        print(f'Config {config_idx}/{dataset_configs}: lr={lr}, wd={wd}, hidden={hidden}, bs={bs}')
        
        res = evaluate_config_cv(dataset, lr, wd, hidden, bs)
        res.update({"lr": lr, "wd": wd, "hidden": hidden, "bs": bs})
        hist.append(res)
        
        if res['accuracy_mean'] > best_metric:
            best_metric = res['accuracy_mean']
            best_cfg = {"lr": lr, "wd": wd, "hidden": hidden, "bs": bs}
        
        # Update progress bar
        pbar.update(1)
        pbar.set_postfix({'best_acc': f"{best_metric:.4f}"})
        
        # Save checkpoint after each config
        checkpoint['completed_configs'] += 1
        BEST[dataset] = {"best_cfg": best_cfg, "best_metric": best_metric, "history": hist}
        
        with open(checkpoint_file, 'w') as f:
            json.dump(checkpoint, f, indent=2)
        
        config_count += 1
    
    pbar.close()
    BEST[dataset] = {"best_cfg": best_cfg, "best_metric": best_metric, "history": hist}
    print(f"Best config for {dataset}: {best_cfg} (accuracy: {best_metric:.4f})")

# Save final results
with open(os.path.join(DATA_DIR, 'passive_cls_best.json'), 'w') as f:
    json.dump(BEST, f, indent=2)

# Clean up checkpoint file
if os.path.exists(checkpoint_file):
    os.remove(checkpoint_file)

total_time = time.time() - start_time
print(f"\nTotal time: {total_time/3600:.2f} hours")
print(f"Average time per config: {total_time/total_configs:.2f} seconds")


Resuming from checkpoint: 185 configs completed
Resuming from dataset 2 (breast_cancer), config 41

=== Tuning iris ===
Skipping iris (already completed)

=== Tuning wine ===
Skipping wine (already completed)

=== Tuning breast_cancer ===
Resuming breast_cancer from config 42/72


breast_cancer configs:   0%|          | 0/72 [00:00<?, ?it/s]

Config 1/72: lr=0.01, wd=0.0, hidden=128, bs=64


KeyboardInterrupt: 

In [None]:
# Plot best accuracy per dataset with error bars
plt.figure(figsize=(8, 5))
datasets_plot = []
means_plot = []
stds_plot = []

for dataset in DATASETS:
    best_idx = None
    best_acc = -np.inf
    for i, h in enumerate(BEST[dataset]['history']):
        if h['accuracy_mean'] > best_acc:
            best_acc = h['accuracy_mean']
            best_idx = i
    
    datasets_plot.append(dataset)
    means_plot.append(BEST[dataset]['history'][best_idx]['accuracy_mean'])
    stds_plot.append(BEST[dataset]['history'][best_idx]['accuracy_std'])

plt.errorbar(datasets_plot, means_plot, yerr=stds_plot, fmt='o', capsize=5, capthick=2)
plt.ylabel('Accuracy (best ± std)')
plt.title('Passive Classification Best Accuracy (CV + Multiple Trials)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, 'passive_cls_best_accuracy.png'), dpi=200)
plt.show()

print(f'\nSaved passive classification tuning results to {FIGURES_DIR}')
print(f'Used {N_TRIALS} trials × {N_FOLDS} folds = {N_TRIALS * N_FOLDS} evaluations per config')