In [None]:
from pathlib import Path

import numpy as np

from dapinet.analysis.real_world_data import load_datasets, run_model_inference

In [2]:
data_path = Path("datasets/real_world")
model_path = Path("models/DAPINet")

datasets = load_datasets(data_path)
if not datasets:
    print("No datasets loaded. Exiting.")

print(f"Loaded {len(datasets)} datasets.")

2026-01-28 10:38:13,925 INFO: Loading datasets from datasets\real_world
Loading Datasets: 100%|██████████| 20/20 [00:00<00:00, 923.19it/s]
2026-01-28 10:38:13,951 INFO: Loaded 20 datasets.


Loaded 20 datasets.


In [3]:
df_pred, stats = run_model_inference(model_path, datasets)

# Save comprehensive results
output_path = Path("results")
output_path.mkdir(parents=True, exist_ok=True)
output_file = output_path / "DAPINet_results.csv"
df_pred.to_csv(output_file, index=False)

# Save per-dataset inference time
if "inference_time_ms" in df_pred.columns:
    time_output = output_path / "DAPINet_inference_time.csv"
    df_pred[["dataset", "inference_time_ms"]].to_csv(time_output, index=False)
    print(f"Saved per-dataset inference time to: {time_output}")

print(f"Saved detailed inference results to: {output_file}")

2026-01-28 10:38:15,810 INFO: Found 5 models in models\DAPINet
2026-01-28 10:38:15,930 INFO: Updating Config from checkpoint_fold_1.pth...
2026-01-28 10:38:15,946 INFO: Loaded model from models\DAPINet\checkpoint_fold_1.pth (Epoch 4, Loss 0.0456)
2026-01-28 10:38:15,983 INFO: Updating Config from checkpoint_fold_2.pth...
2026-01-28 10:38:15,998 INFO: Loaded model from models\DAPINet\checkpoint_fold_2.pth (Epoch 8, Loss 0.0461)
2026-01-28 10:38:16,038 INFO: Updating Config from checkpoint_fold_3.pth...
2026-01-28 10:38:16,052 INFO: Loaded model from models\DAPINet\checkpoint_fold_3.pth (Epoch 9, Loss 0.0513)
2026-01-28 10:38:16,095 INFO: Updating Config from checkpoint_fold_4.pth...
2026-01-28 10:38:16,109 INFO: Loaded model from models\DAPINet\checkpoint_fold_4.pth (Epoch 7, Loss 0.0482)
2026-01-28 10:38:16,158 INFO: Updating Config from checkpoint_fold_5.pth...
2026-01-28 10:38:16,178 INFO: Loaded model from models\DAPINet\checkpoint_fold_5.pth (Epoch 8, Loss 0.0447)
Model Inference: 

Saved per-dataset inference time to: results\DAPINet_inference_time.csv
Saved detailed inference results to: results\DAPINet_results.csv


In [4]:
# Test permutation invariance by shuffling feature columns and re-running inference
rng = np.random.default_rng(seed=42)
permuted_datasets: dict[str, dict] = {}
for name, ds in datasets.items():
    X = ds["X"]
    perm = rng.permutation(X.shape[1])
    permuted_entry = {"X": X[:, perm].copy()}
    for optional_key in ("y", "ari"):
        if optional_key in ds:
            permuted_entry[optional_key] = ds[optional_key]
    permuted_datasets[name] = permuted_entry

# Run inference on permuted columns
permuted_pred, perm_stats = run_model_inference(model_path, permuted_datasets)

algo_cols = [
    c
    for c in df_pred.columns
    if c not in ("dataset", "inference_time_ms", "n_rows", "n_cols")
    and c in permuted_pred.columns
    and np.issubdtype(df_pred[c].dtype, np.number)
 ]
diff = df_pred.set_index("dataset")[algo_cols].subtract(
    permuted_pred.set_index("dataset")[algo_cols], fill_value=np.nan
)
max_abs_diff = diff.abs().max(axis=1)

print("\nPermutation test:")
print(f"  Max abs diff per dataset (predictions): {max_abs_diff.describe().to_dict()}")

2026-01-28 10:38:27,128 INFO: Found 5 models in models\DAPINet
2026-01-28 10:38:27,202 INFO: Updating Config from checkpoint_fold_1.pth...
2026-01-28 10:38:27,221 INFO: Loaded model from models\DAPINet\checkpoint_fold_1.pth (Epoch 4, Loss 0.0456)
2026-01-28 10:38:27,265 INFO: Updating Config from checkpoint_fold_2.pth...
2026-01-28 10:38:27,279 INFO: Loaded model from models\DAPINet\checkpoint_fold_2.pth (Epoch 8, Loss 0.0461)
2026-01-28 10:38:27,330 INFO: Updating Config from checkpoint_fold_3.pth...
2026-01-28 10:38:27,345 INFO: Loaded model from models\DAPINet\checkpoint_fold_3.pth (Epoch 9, Loss 0.0513)
2026-01-28 10:38:27,391 INFO: Updating Config from checkpoint_fold_4.pth...
2026-01-28 10:38:27,408 INFO: Loaded model from models\DAPINet\checkpoint_fold_4.pth (Epoch 7, Loss 0.0482)
2026-01-28 10:38:27,447 INFO: Updating Config from checkpoint_fold_5.pth...
2026-01-28 10:38:27,460 INFO: Loaded model from models\DAPINet\checkpoint_fold_5.pth (Epoch 8, Loss 0.0447)
Model Inference: 


Permutation test:
  Max abs diff per dataset (predictions): {'count': 20.0, 'mean': 6.705522537231445e-08, 'std': 2.3929925815670675e-08, 'min': 2.9802322387695312e-08, '25%': 5.960464477539063e-08, '50%': 5.960464477539063e-08, '75%': 5.960464477539063e-08, 'max': 1.1920928955078125e-07}





In [5]:
# create dataframe with selected algorithm and its value per dataset
# include per-dataset inference time for convenience
df_out = df_pred[["dataset"]].copy()
if "inference_time_ms" in df_pred.columns:
    df_out["inference_time_ms"] = df_pred["inference_time_ms"]

algo_cols = df_pred.drop(
    columns=[col for col in ["dataset", "inference_time_ms"] if col in df_pred.columns]
)
df_out["selected_algorithm"] = algo_cols.idxmax(axis=1)
df_out["value"] = algo_cols.max(axis=1)

# save results
output_file_selected = output_path / "DAPINet_results.csv"
df_out.to_csv(output_file_selected, index=False)

print(f"Saved selected algorithms to: {output_file_selected}")

Saved selected algorithms to: results\DAPINet_results.csv
