In [None]:
import datetime as dt
from pathlib import Path
import pandas as pd
print("done")

In [None]:
import warnings, logging
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
logging.getLogger("lightgbm").setLevel(logging.ERROR)
logging.getLogger("optuna").setLevel(logging.WARNING)

print("done")

In [None]:
from model.features import (
    download_or_load_prices,
    compute_features,
    data_prep_and_feature_engineering,
)
from model.grid_search import run_grid_search
from model.backtest import run_backtest

print("downloaded!")

In [None]:
start = dt.date(2015, 1, 1)
end   = dt.date(2024, 7, 1)

# --- download SPY close prices ---
spy = download_or_load_prices(
    ["SPY"],
    Path("spy_cache.parquet"),
    start,
    end
)["Close"].rename("SPY")

# --- download ^VIX close prices ---
vix = download_or_load_prices(
    ["^VIX"],
    Path("vix_cache.parquet"),
    start,
    end
)["Close"].rename("^VIX")

# --- combine into one DataFrame with plain columns ---
market = pd.concat([spy, vix], axis=1)

print("market columns ->", market.columns.tolist())   # should be ['SPY', '^VIX']



In [None]:
# 1. raw prices for your ticker
df_prices = download_or_load_prices(
    ["AAPL"],
    Path("price_cache.parquet"),
    start,
    end
)

# 2. compute all features + target
df_feat = compute_features(df_prices, market)

# 3. build inputs for the next helper
feature_list = [c for c in df_feat.columns if c != "Target"]  # list of feature names
tickers      = ["AAPL"]                                       # list of tickers

# 4. prepare training data (function returns X_train_sel, y_train)
X_train, y_train = data_prep_and_feature_engineering(
    tickers,
    feature_list,
    Path("feature_cache.parquet"),
    start,
    end
)

print("X_train shape:", X_train.shape, "| y_train shape:", y_train.shape)


In [None]:
# hyper‑parameter search
best_params, best_model = run_grid_search(X_train, y_train)
print("best params:", best_params)

# simple back‑test (use your own test split or walk‑forward logic if you prefer)
results = run_backtest(best_model, X_train, y_train)
results.head()


In [None]:
print("---------------------------------------")

In [None]:
# ── add these two lines FIRST ───────────────────────────────────────────────
import joblib
import warnings; warnings.filterwarnings("ignore", category=UserWarning); warnings.filterwarnings("ignore", category=FutureWarning)
# ────────────────────────────────────────────────────────────────────────────

# 1) run the quiet grid‑search you already have
best_params, best_model = run_grid_search(X_train, y_train)
print("✅ best params:", best_params)

# 2) fit the RandomForest with only the RF‑compatible params
rf_params = {k: v for k, v in best_params.items()
             if k in {"n_estimators", "max_depth", "max_features",
                      "min_samples_split", "min_samples_leaf"}}

best_model = RandomForestClassifier(**rf_params,
                                   n_jobs=-1,
                                   random_state=42).fit(X_train, y_train)
print("🔥  model fitted")

# 3) build bt_data exactly as before  (df_feat, idx_dates, proba …)
# … your existing code that constructs `bt_data` …

# 4) save artefact the way backtest.py expects
art_path = Path("best_model.joblib")
joblib.dump({"model": best_model,
             "features": X_train.columns.tolist(),
             "bt_data": bt_data},
            art_path)
print("📦 artefact saved to", art_path)

# 5) run the back‑test (only a Path arg now)
results = run_backtest(art_path)
results.head()
