# AIMS-ML Final Report Notebook
This notebook summarizes in-distribution performance, OOD material generalization, and feature importance.

In [None]:
import pandas as pd
from aims_ml.ml import run_full_ml_diagnostics
from aims_ml.visualization import plot_model_comparison


In [None]:
df = pd.read_csv('data/sim_runs.csv')
df.shape


In [None]:
outputs = run_full_ml_diagnostics(df, artifacts_dir='artifacts')
metrics = outputs['metrics']
ood_metrics = outputs['ood_metrics']
feature_importance = outputs['feature_importance']
metrics


In [None]:
plot_model_comparison(metrics, out_path='artifacts/model_comparison_rmse.png')
metrics.sort_values(['target', 'rmse']).reset_index(drop=True)


## OOD Evaluation (Hold Out One Material)

In [None]:
if len(ood_metrics) == 0:
    print('OOD table is empty. Generate a larger dataset with all materials.')
else:
    display(ood_metrics.sort_values(['holdout_material', 'rmse']).reset_index(drop=True))


## Feature Importance (Permutation, Random Forest)

In [None]:
display(feature_importance.groupby('target').head(10).reset_index(drop=True))


## Discussion prompts
- How much performance drops in OOD material holdouts?
- Do uncertainty estimates grow in OOD settings?
- Which features dominate each target, and is this physically plausible?
- What simulator assumptions likely bias ML behavior?