# Notebook 4: Test-Harness for Your Best Model

This notebook loads your final model artifact, grabs one sample from your held-out test set, and lets you play with its feature values to see how the predicted probability (and class) changes

In [None]:
# Parameters for Papermill
DATA_PROCESSED = "data/processed"    # where 3_modeling saved historical_proc.csv
MODEL_DIR      = "src/models"        # where 3_modeling saved final_<best>.pkl
SAMPLE_INDEX   = 0                   # which test-set row to grab by default

In [None]:
# Cell 3 – Load model, features & build a “sample” input

import os, joblib, pandas as pd

# 1) Read in the full processed DataFrame (same split logic as Notebook 3)
df = pd.read_csv(os.path.join(DATA_PROCESSED, "historical_proc.csv"), parse_dates=["date"])
df.sort_values("date", inplace=True)
split = int(len(df) * 0.75)
test_df = df.iloc[split:].copy()

# 2) Load the trained model
model_path = os.path.join(MODEL_DIR, "final_best_model.pkl")  # adjust filename if different
best_model = joblib.load(model_path)

# 3) Drop only the columns that actually exist
to_drop = ["date","close_next","high_next","low_next","target_up","target_high","target_low"]
drop_cols = [c for c in to_drop if c in test_df.columns]
sample_df = test_df.drop(columns=drop_cols)

# 4) Pick one sample row to sweep / inspect
SAMPLE_INDEX = 0  # e.g. first row of test set
input_sample = sample_df.iloc[[SAMPLE_INDEX]].reset_index(drop=True)

print("▶ Using one test sample (first row) with features:")
display(input_sample.head())

### Optional testing features

# ▶ UNCOMMENT any line below to test “what-if” scenarios
# e.g. simulate a huge spike in standardized price:
# sample_df["price_std"] = 1.5
# sample_df["PC1"]        =  0.0
# sample_df["percent_change_24h_mm"] = 0.75

### Scoring the modified sample

In [None]:
proba = best_model.predict_proba(sample_df)[0,1]
cls   = best_model.predict(sample_df)[0]

print(f"→ Predicted probability of up-day: {proba:.3f}")
print(f"→ Predicted class (0=down,1=up): {cls}")
