# 📊 Notebook 4: Test-Harness for Your Best Model

This notebook loads your final model artifact, grabs one sample from your held-out test set, and lets you play with its feature values to see how the predicted probability (and class) changes

In [None]:
# Parameters for Papermill
DATA_PROCESSED = "data/processed"    # where 3_modeling saved historical_proc.csv
MODEL_DIR      = "src/models"        # where 3_modeling saved final_<best>.pkl
SAMPLE_INDEX   = 0                   # which test-set row to grab by default

In [None]:
import os, joblib
import pandas as pd

# 1a) Load processed features
df = pd.read_csv(
    os.path.join(DATA_PROCESSED, "historical_proc.csv"),
    parse_dates=["date"]
)

# 1b) Re-create train/test split (75/25 as in notebook 3)
split = int(len(df)*0.75)
test_df = df.iloc[split:].reset_index(drop=True)

# 1c) Load best model
model_files = os.listdir(MODEL_DIR)
# you should have exactly one final_*.pkl
model_path = os.path.join(MODEL_DIR, [f for f in model_files if f.startswith("final_")][0])
best_model = joblib.load(model_path)

# 1d) Grab one sample as DataFrame
sample = test_df.drop(columns=["date","close_next","high_next","low_next","target_up","target_high","target_low"])
sample_df = sample.iloc[[SAMPLE_INDEX]].copy()
print("Template sample (first few cols):")
display(sample_df.iloc[:,:5])

### Optional testing features

# ▶ UNCOMMENT any line below to test “what-if” scenarios
# e.g. simulate a huge spike in standardized price:
# sample_df["price_std"] = 1.5
# sample_df["PC1"]        =  0.0
# sample_df["percent_change_24h_mm"] = 0.75

### Scoring the modified sample

In [None]:
proba = best_model.predict_proba(sample_df)[0,1]
cls   = best_model.predict(sample_df)[0]

print(f"→ Predicted probability of up-day: {proba:.3f}")
print(f"→ Predicted class (0=down,1=up): {cls}")
