In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from nn.models import OneHiddenMLP
from nn.training import train_passive, TrainConfig
from nn.evaluation import evaluate_regression
from nn.experiments import ActiveConfig, run_active_regression
from nn.strategies import uncertainty_sampling, sensitivity_sampling, UncertaintySamplingConfig

from typing import Dict

SAVE_DIR = os.path.join('..', 'report', 'figures')
DATA_DIR = os.path.join('..', 'data')
os.makedirs(SAVE_DIR, exist_ok=True)

with open(os.path.join(DATA_DIR, 'reg_uncertainty_results.json'), 'r') as f:
    unc_results = json.load(f)
with open(os.path.join(DATA_DIR, 'reg_sensitivity_results.json'), 'r') as f:
    sen_results = json.load(f)
with open(os.path.join(DATA_DIR, 'passive_reg_best.json'), 'r') as f:
    pas_results = json.load(f)

DATASETS = ['diabetes', 'linnerud', 'california']
METRICS = ['rmse', 'mae', 'r2']
BUDGETS = [200]
UNCERTAINTY_METHODS = ['entropy', 'margin', 'least_confidence']


In [None]:
def get_data_splits_regression(dataset: str):
    # Load data
    if dataset == "diabetes":
        ds = datasets.load_diabetes()
        y = ds.target.astype(np.float32)
    elif dataset == "linnerud":
        ds = datasets.load_linnerud()
        y = ds.target[:, 0].astype(np.float32)
    elif dataset == "california":
        ds = datasets.fetch_california_housing()
        y = ds.target.astype(np.float32)
    
    X = ds.data.astype(np.float32)
    
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    return X_train_val, X_test, y_train_val, y_test

def evaluate_passive_test_regression(dataset: str, budget: int) -> Dict[str, float]:
    # Get best hyperparameters from tuning results
    best_cfg = pas_results[dataset]['best_cfg']
    lr = best_cfg['lr']
    wd = best_cfg['wd']
    hidden = best_cfg['hidden']
    bs = best_cfg['bs']
    
    # Get data splits
    X_train_val, X_test, y_train_val, y_test = get_data_splits_regression(dataset)
    
    all_metrics = []

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_val)
    X_test_scaled = scaler.transform(X_test)
    
    # Convert to tensors
    X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_val, dtype=torch.float32).unsqueeze(-1)
    X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(-1)
    
    # Create datasets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False)
    
    # Train model
    model = OneHiddenMLP(input_dim=X_train_scaled.shape[1], hidden_units=hidden, output_dim=1)
    loss_fn = nn.MSELoss()
    config = TrainConfig(learning_rate=lr, weight_decay=wd, batch_size=bs, max_epochs=200, patience=20, device='cpu')
    
    train_passive(model, train_loader, test_loader, loss_fn, config)
    
    # Evaluate on test set
    metrics = evaluate_regression(model, test_loader, device='cpu')
    
    return metrics

def evaluate_active_test_regression(dataset: str, strategy: str, method: str, budget: int) -> Dict[str, float]:
    # Get best hyperparameters from tuning 
    best_cfg = pas_results[dataset]['best_cfg']
    lr = best_cfg['lr']
    wd = best_cfg['wd']
    hidden = best_cfg['hidden']
    bs = best_cfg['bs']
    init, query = 20, 10

    if strategy == 'uncertainty':
        # Get hyperparameters tuned for this uncertainty method
        best_results = unc_results[dataset][method]
        # Extract hyperparameters from the tuned results
        # You need to access the actual tuned config, not passive results
    elif strategy == 'sensitivity':
        # Get hyperparameters tuned for sensitivity
        best_results = sen_results[dataset]
        # Extract hyperparameters from sensitivity tuning
        method = ''  # Only override for sensitivity

    # Get data splits
    X_train_val, X_test, y_train_val, y_test = get_data_splits_regression(dataset)
    
    all_metrics = []

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_val)
    X_test_scaled = scaler.transform(X_test)
    
    # Convert to tensors
    X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_val, dtype=torch.float32).unsqueeze(-1)
    X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(-1)
    
    # Create datasets
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False)
    
    # Simulate active learning on the train+val set
    train_config = TrainConfig(learning_rate=lr, weight_decay=wd, batch_size=bs, 
                                max_epochs=200, patience=20, device='cpu')
    
    # Create initial labeled pool
    num_train = X_train_scaled.shape[0]
    labeled_indices = torch.randperm(num_train)[:init]
    unlabeled_indices = torch.tensor([i for i in range(num_train) if i not in labeled_indices.tolist()], dtype=torch.long)
    
    x_pool = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_pool = y_train_tensor.clone()
    
    # Active learning loop
    while labeled_indices.numel() < min(budget, num_train):
        # Train model on current labeled set
        train_subset = TensorDataset(x_pool[labeled_indices], y_pool[labeled_indices])
        
        train_loader = DataLoader(train_subset, batch_size=bs, shuffle=True)
        
        model = OneHiddenMLP(input_dim=X_train_scaled.shape[1], hidden_units=hidden, output_dim=1)
        loss_fn = nn.MSELoss()
        
        train_passive(model, train_loader, test_loader, loss_fn, train_config)
        
        if unlabeled_indices.numel() == 0:
            break
        
        # Query selection
        if strategy == 'uncertainty':
            sel = uncertainty_sampling(
                model,
                x_pool[unlabeled_indices].to(train_config.device),
                query,
                UncertaintySamplingConfig(mode="regression", method=method),
            )
        elif strategy == 'sensitivity':
            sel = sensitivity_sampling(model, x_pool[unlabeled_indices].to(train_config.device), query)
        
        # Update labeled and unlabeled sets
        newly_selected = unlabeled_indices[sel]
        labeled_indices = torch.unique(torch.cat([labeled_indices, newly_selected]))
        mask = torch.ones_like(unlabeled_indices, dtype=torch.bool)
        mask[sel] = False
        unlabeled_indices = unlabeled_indices[mask]
        
        if labeled_indices.numel() >= budget:
            break
    
    # Final evaluation on test set
    final_train_subset = TensorDataset(x_pool[labeled_indices], y_pool[labeled_indices])
    final_train_loader = DataLoader(final_train_subset, batch_size=bs, shuffle=True)
    final_test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False)
    
    final_model = OneHiddenMLP(input_dim=X_train_scaled.shape[1], hidden_units=hidden, output_dim=1)
    loss_fn = nn.MSELoss()
    
    train_passive(final_model, final_train_loader, final_test_loader, loss_fn, train_config)
    
    # Evaluate on test set
    metrics = evaluate_regression(final_model, final_test_loader, device='cpu')
    
    return metrics


Test set evaluation functions for regression defined!


In [None]:
test_results = {
    'passive': {},
    'uncertainty': {},
    'sensitivity': {}
}

# Evaluate passive learning on test set
for dataset in DATASETS:
    print(f"Running passive on {dataset}")
    test_results['passive'][dataset] = evaluate_passive_test_regression(dataset, max(BUDGETS))

# Evaluate uncertainty-based active learning on test set
for dataset in DATASETS:
    test_results['uncertainty'][dataset] = {}
    for method in UNCERTAINTY_METHODS:
        print(f"Running uncertainty on {dataset} - {method}")
        test_results['uncertainty'][dataset][method] = {}
        for budget in BUDGETS:
            test_results['uncertainty'][dataset][method][str(budget)] = evaluate_active_test_regression(dataset, 'uncertainty', method, budget)

# Evaluate sensitivity-based active learning on test set
for dataset in DATASETS:
    print(f"Running sensitivity on {dataset}...")
    test_results['sensitivity'][dataset] = {}
    for budget in BUDGETS:
        test_results['sensitivity'][dataset][str(budget)] = evaluate_active_test_regression(dataset, 'sensitivity', '', budget)

print("Done")

Starting test set evaluation for regression...
This will evaluate all methods on the held-out test sets that were never seen during hyperparameter tuning.

Evaluating passive learning on test sets...
  diabetes...
  linnerud...
  california...

Evaluating uncertainty-based active learning on test sets...
  diabetes - entropy...
  diabetes - margin...
  diabetes - least_confidence...
  linnerud - entropy...
  linnerud - margin...
  linnerud - least_confidence...
  california - entropy...
  california - margin...
  california - least_confidence...

Evaluating sensitivity-based active learning on test sets...
  diabetes...
  linnerud...
  california...

Test set evaluation for regression completed!
Results stored in test_results dictionary.


In [None]:
for dataset in DATASETS:
    for metric in METRICS:

        plt.figure(figsize=(10, 6))
        
        # Plot uncertainty methods
        for method in UNCERTAINTY_METHODS:
            result = [test_results['uncertainty'][dataset][method][str(b)][f'{metric}'] for b in BUDGETS]
            plt.plot(BUDGETS, result, marker='o', label=f'uncertainty_{method}', linewidth=2)
        
        # Plot sensitivity method
        result = [test_results['sensitivity'][dataset][str(b)][f'{metric}'] for b in BUDGETS]
        plt.plot(BUDGETS, result, marker='s', label='sensitivity', linewidth=2)
        
        # Plot passive baseline as horizontal line
        baseline = test_results['passive'][dataset][f'{metric}']
        plt.axhline(baseline, color='k', linestyle='--', label='passive_best', linewidth=2)
        
        plt.xlabel('Labeled budget (max_labels)')
        plt.ylabel(metric)
        plt.title(f'{dataset}: Strategies Comparison ({metric}) - TEST SET EVALUATION')
        plt.grid(True, alpha=0.3)
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.savefig(os.path.join(SAVE_DIR, f'reg_{dataset}_comparison_{metric}_test.png'), dpi=200, bbox_inches='tight')
        plt.close()
        
        print(f"Saved {dataset} {metric} comparison (TEST SET)")



=== Comparing strategies for diabetes (TEST SET) ===
Saved diabetes rmse comparison (TEST SET)
Saved diabetes mae comparison (TEST SET)
Saved diabetes r2 comparison (TEST SET)

=== Comparing strategies for linnerud (TEST SET) ===
Saved linnerud rmse comparison (TEST SET)
Saved linnerud mae comparison (TEST SET)
Saved linnerud r2 comparison (TEST SET)

=== Comparing strategies for california (TEST SET) ===
Saved california rmse comparison (TEST SET)
Saved california mae comparison (TEST SET)
Saved california r2 comparison (TEST SET)


In [None]:
summary_data = []

for dataset in DATASETS:
    # Passive learning
    summary_data.append({
        'dataset': dataset,
        'method': 'passive',
        'budget': max(BUDGETS),
        'rmse': test_results['passive'][dataset]['rmse'],
        'mae': test_results['passive'][dataset]['mae'],
        'r2': test_results['passive'][dataset]['r2'],
    })
    
    # Uncertainty methods
    for method in UNCERTAINTY_METHODS:
        max_budget = str(max(BUDGETS))
        summary_data.append({
            'dataset': dataset,
            'method': f'uncertainty_{method}',
            'budget': max(BUDGETS),
            'rmse': test_results['uncertainty'][dataset][method][max_budget]['rmse'],
            'mae': test_results['uncertainty'][dataset][method][max_budget]['mae'],
            'r2': test_results['uncertainty'][dataset][method][max_budget]['r2'],
        })
    
    # Sensitivity method
    max_budget = str(max(BUDGETS))
    summary_data.append({
        'dataset': dataset,
        'method': 'sensitivity',
        'budget': max(BUDGETS),
        'rmse': test_results['sensitivity'][dataset][max_budget]['rmse'],
        'mae': test_results['sensitivity'][dataset][max_budget]['mae'],
        'r2': test_results['sensitivity'][dataset][max_budget]['r2'],
    })

# Convert to DataFrame for nice display
df = pd.DataFrame(summary_data)
print(df.round(4))

# Save summary
df.to_csv(os.path.join(SAVE_DIR, 'reg_comparison_summary_test.csv'), index=False)


=== Summary Table (TEST SET EVALUATION) ===
       dataset                        method  budget     rmse      mae      r2
0     diabetes                       passive     200  51.6864  40.6610  0.4958
1     diabetes           uncertainty_entropy     200  54.5341  43.4015  0.4387
2     diabetes            uncertainty_margin     200  55.9546  45.1058  0.4091
3     diabetes  uncertainty_least_confidence     200  53.7882  43.2867  0.4539
4     diabetes                   sensitivity     200  52.8503  42.2713  0.4728
5     linnerud                       passive     200  55.3287  46.7521 -9.8869
6     linnerud           uncertainty_entropy     200  55.3851  42.8853 -9.9091
7     linnerud            uncertainty_margin     200  44.4944  36.0868 -6.0407
8     linnerud  uncertainty_least_confidence     200  49.9238  40.5934 -7.8638
9     linnerud                   sensitivity     200  50.3240  40.4513 -8.0065
10  california                       passive     200   0.5317   0.3603  0.7842
11  cal