In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from datetime import timedelta

# ------------------ Helpers ------------------

def flatten_df(df):
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = ['_'.join([str(c) for c in col if c]).strip() for col in df.columns.values]
    df.reset_index(inplace=True)
    return df

def fetch_stock_data(ticker, years=15):
    df = yf.download(ticker, period=f"{years}y")
    if df.empty:
        return pd.DataFrame(columns=["date", "open", "high", "low", "close", "adj_close", "volume"])
    df = flatten_df(df)
    df.rename(columns={
        "Date": "date", "Open": "open", "High": "high",
        "Low": "low", "Close": "close", "Adj Close": "adj_close",
        "Volume": "volume"
    }, inplace=True)
    df['date'] = pd.to_datetime(df['date']).dt.date
    #print(df.head())
    return df

def fetch_benchmark(ticker, col_name, years=15):
    df = yf.download(ticker, period=f"{years}y")[["Close"]]
   # print(df.head(),col_name)
    if df.empty:
        return pd.DataFrame(columns=["date", col_name])
    df = flatten_df(df)
    df.rename(columns={"Date": "date", "Close_"+ticker: col_name}, inplace=True)
    #print(df.head())
    df['date'] = pd.to_datetime(df['date']).dt.date

    return df[["date", col_name]]

def fetch_benchmarks(years=15):
    spx = fetch_benchmark("^GSPC", "spx_close", years)
    ndx = fetch_benchmark("^NDX", "ndx_close", years)
    vix = fetch_benchmark("^VIX", "vix_close", years)
    return spx, ndx, vix

def fetch_10y_yield(years=15):
    df = yf.download("^TNX", period=f"{years}y")
    if df.empty:
        return pd.DataFrame(columns=["date", "dgs10_yield"])
    df = flatten_df(df)

    df.rename(columns={"Date": "date", "Close_^TNX": "dgs10_yield"}, inplace=True)
    df['date'] = pd.to_datetime(df['date']).dt.date
    df["dgs10_yield"] = df["dgs10_yield"] / 10.0
    return df[["date", "dgs10_yield"]]

def build_feature_frame(stock_df, spx_df, ndx_df, vix_df, yield_df, macro_path=None):
    df = stock_df.copy()
    df = df.merge(spx_df, on="date", how="left")
    df = df.merge(ndx_df, on="date", how="left")
    df = df.merge(vix_df, on="date", how="left")
    df = df.merge(yield_df, on="date", how="left")
    if macro_path:
        macro = pd.read_csv(macro_path)
        macro['date'] = pd.to_datetime(macro['date']).dt.date
        macro.drop_duplicates(subset=['date'], inplace=True)
        df = df.merge(macro, on="date", how="left")
    df.fillna(method="ffill", inplace=True)
    return df

# ------------------ Fetch Data ------------------

stock_df = fetch_stock_data("QQQ", years=15)
spx_df, ndx_df, vix_df = fetch_benchmarks(years=15)
yield_df = fetch_10y_yield(years=15)

df = build_feature_frame(stock_df, spx_df, ndx_df, vix_df, yield_df, macro_path=None)
df = df.sort_values("date")

# ------------------ Train/Test Split ------------------
df.rename(columns={"Open_QQQ": "Open", "High_QQQ": "High","Volume_QQQ":"Volume","Close_QQQ":"close","Low_QQQ":"low"}, inplace=True)
print(df.tail())
split_date = df['date'].iloc[0].replace(year=df['date'].iloc[0].year + 10)
train_df = df[df['date'] < split_date]
test_df = df[df['date'] >= split_date]

feature_cols = ["Open", "High", "Volume", "spx_close", "ndx_close", "vix_close", "dgs10_yield"]

# Close price model
model_close = RandomForestRegressor(n_estimators=200, random_state=42)
model_close.fit(train_df[feature_cols], train_df["close"])
pred_close = model_close.predict(test_df[feature_cols])

# Low price model
model_low = RandomForestRegressor(n_estimators=200, random_state=42)
model_low.fit(train_df[feature_cols], train_df["low"])
pred_low = model_low.predict(test_df[feature_cols])

# ------------------ Backtest Results ------------------

mae_close = mean_absolute_error(test_df["close"], pred_close)
r2_close = r2_score(test_df["close"], pred_close)
mae_low = mean_absolute_error(test_df["low"], pred_low)
r2_low = r2_score(test_df["low"], pred_low)

print(f"Close Price Prediction - MAE: {mae_close:.2f}, R²: {r2_close:.4f}")
print(f"Low Price Prediction   - MAE: {mae_low:.2f}, R²: {r2_low:.4f}")

# ------------------ Train Full Model for Tomorrow ------------------

model_close.fit(df[feature_cols], df["close"])
model_low.fit(df[feature_cols], df["low"])

last_row = df.iloc[-1][feature_cols].values.reshape(1, -1)
pred_tomorrow_close = model_close.predict(last_row)[0]
pred_tomorrow_low = model_low.predict(last_row)[0]

tomorrow_date = pd.to_datetime(df['date'].iloc[-1]) + timedelta(days=1)

print("\n---- Tomorrow's Forecast ----")
print(f"Date: {tomorrow_date.date()}")
print(f"Predicted Close: {pred_tomorrow_close:.2f}")
print(f"Predicted Low:   {pred_tomorrow_low:.2f}")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
  df.fillna(method="ffill", inplace=True)


            date       close        High         low        Open    Volume  \
3769  2025-12-19  616.255981  616.825255  611.082654  611.162568  60369800   
3770  2025-12-22  619.210022  621.650024  617.770020  621.349976  43703100   
3771  2025-12-23  622.109985  622.409973  617.780029  618.200012  41120400   
3772  2025-12-24  623.929993  624.280029  621.719971  621.989990  18468700   
3773  2025-12-26  623.890015  625.520020  623.140015  624.659973  28932100   

        spx_close     ndx_close  vix_close  dgs10_yield  
3769  6834.500000  25346.179688      14.91       0.4151  
3770  6878.490234  25461.699219      14.08       0.4169  
3771  6909.790039  25587.830078      14.00       0.4169  
3772  6932.049805  25656.150391      13.47       0.4136  
3773  6929.939941  25644.390625      13.60       0.4136  
Close Price Prediction - MAE: 100.90, R²: -0.9841
Low Price Prediction   - MAE: 100.10, R²: -0.9694

---- Tomorrow's Forecast ----
Date: 2025-12-27
Predicted Close: 623.94
Predicted L

