# RUN EXPERIMENTS

In [None]:
from modeling_pipeline_utils import load_feature_csv, run_experiments

# Update these paths to point to where your CSV files are actually located
# For example, if they're in your current working directory:
train_csv = r""
test_csv = r""

# Or provide the full correct path to your files:
# train_csv = r"C:\correct\path\to\train_merged_LE_RE.csv"
# test_csv = r"C:\correct\path\to\test_merged_LE_RE.csv"

X_train, y_train, X_test, y_test = load_feature_csv(train_csv, test_csv)

results, metrics_table, jaccard = run_experiments(
    X_train, y_train, X_test, y_test,
    top_n=10,
    nsamples_ci=2000 #<----------------For "fast run," change it to 200
)

# Results table

In [None]:
display(metrics_table)

# Sanity checks

In [None]:

len(results)
#4-8 ----> sanity run
#10-20 ------> full experiment set

# Information per model
- Change results[0] to inspect the different models

In [None]:
#dataclass fields
res = results[0] #<------------change number here to view different model.
print("Model:", res.model_name)
print("Selector:", res.selector_name)
print("Optimal threshold:", res.optimal_threshold)

print("\nSelected features:")
print("\n".join(res.selected_features))

print("\nMetrics:")
display(res.metrics_test)


# Confusion matrices

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

for res in results:
    fig, ax = plt.subplots(figsize=(4, 4))
    ConfusionMatrixDisplay(
        confusion_matrix=res.confusion_matrix_norm,
        display_labels=[0, 1]
    ).plot(ax=ax, values_format=".2f")
    ax.set_title(f"{res.model_name} | {res.selector_name}")
    plt.tight_layout()
    plt.show()
