# This is a sample Jupyter Notebook

Below is an example of a code cell.
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from datetime import timedelta

# ------------------ Helpers ------------------

def flatten_df(df):
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = ['_'.join([str(c) for c in col if c]).strip() for col in df.columns.values]
    df.reset_index(inplace=True)
    return df

def fetch_stock_data(ticker, years=15):
    df = yf.download(ticker, period=f"{years}y")
    if df.empty:
        return pd.DataFrame(columns=["date", "open", "high", "low", "close", "adj_close", "volume"])
    df = flatten_df(df)
    df.rename(columns={
        "Date": "date", "Open": "open", "High": "high",
        "Low": "low", "Close": "close", "Adj Close": "adj_close",
        "Volume": "volume"
    }, inplace=True)
    df['date'] = pd.to_datetime(df['date']).dt.date
    #print(df.head())
    return df

def fetch_benchmark(ticker, col_name, years=15):
    df = yf.download(ticker, period=f"{years}y")[["Close"]]
   # print(df.head(),col_name)
    if df.empty:
        return pd.DataFrame(columns=["date", col_name])
    df = flatten_df(df)
    df.rename(columns={"Date": "date", "Close_"+ticker: col_name}, inplace=True)
    #print(df.head())
    df['date'] = pd.to_datetime(df['date']).dt.date

    return df[["date", col_name]]

def fetch_benchmarks(years=15):
    spx = fetch_benchmark("^GSPC", "spx_close", years)
    ndx = fetch_benchmark("^NDX", "ndx_close", years)
    vix = fetch_benchmark("^VIX", "vix_close", years)
    return spx, ndx, vix

def fetch_10y_yield(years=15):
    df = yf.download("^TNX", period=f"{years}y")
    if df.empty:
        return pd.DataFrame(columns=["date", "dgs10_yield"])
    df = flatten_df(df)

    df.rename(columns={"Date": "date", "Close_^TNX": "dgs10_yield"}, inplace=True)
    df['date'] = pd.to_datetime(df['date']).dt.date
    df["dgs10_yield"] = df["dgs10_yield"] / 10.0
    return df[["date", "dgs10_yield"]]

def build_feature_frame(stock_df, spx_df, ndx_df, vix_df, yield_df, macro_path=None):
    df = stock_df.copy()
    df = df.merge(spx_df, on="date", how="left")
    df = df.merge(ndx_df, on="date", how="left")
    df = df.merge(vix_df, on="date", how="left")
    df = df.merge(yield_df, on="date", how="left")
    if macro_path:
        macro = pd.read_csv(macro_path)
        macro['date'] = pd.to_datetime(macro['date']).dt.date
        macro.drop_duplicates(subset=['date'], inplace=True)
        df = df.merge(macro, on="date", how="left")
    df.fillna(method="ffill", inplace=True)
    return df

# ------------------ Fetch Data ------------------

stock_df = fetch_stock_data("QQQ", years=15)
spx_df, ndx_df, vix_df = fetch_benchmarks(years=15)
yield_df = fetch_10y_yield(years=15)

df = build_feature_frame(stock_df, spx_df, ndx_df, vix_df, yield_df, macro_path=None)
df = df.sort_values("date")

# ------------------ Train/Test Split ------------------
df.rename(columns={"Open_QQQ": "Open", "High_QQQ": "High","Volume_QQQ":"Volume","Close_QQQ":"close","Low_QQQ":"low"}, inplace=True)
print(df.tail())
split_date = df['date'].iloc[0].replace(year=df['date'].iloc[0].year + 10)
train_df = df[df['date'] < split_date]
test_df = df[df['date'] >= split_date]

feature_cols = ["Open", "High", "Volume", "spx_close", "ndx_close", "vix_close", "dgs10_yield"]

# Close price model
model_close = RandomForestRegressor(n_estimators=200, random_state=42)
model_close.fit(train_df[feature_cols], train_df["close"])
pred_close = model_close.predict(test_df[feature_cols])

# Low price model
model_low = RandomForestRegressor(n_estimators=200, random_state=42)
model_low.fit(train_df[feature_cols], train_df["low"])
pred_low = model_low.predict(test_df[feature_cols])

# ------------------ Backtest Results ------------------

mae_close = mean_absolute_error(test_df["close"], pred_close)
r2_close = r2_score(test_df["close"], pred_close)
mae_low = mean_absolute_error(test_df["low"], pred_low)
r2_low = r2_score(test_df["low"], pred_low)

print(f"Close Price Prediction - MAE: {mae_close:.2f}, R²: {r2_close:.4f}")
print(f"Low Price Prediction   - MAE: {mae_low:.2f}, R²: {r2_low:.4f}")

# ------------------ Train Full Model for Tomorrow ------------------

model_close.fit(df[feature_cols], df["close"])
model_low.fit(df[feature_cols], df["low"])

last_row = df.iloc[-1][feature_cols].values.reshape(1, -1)
pred_tomorrow_close = model_close.predict(last_row)[0]
pred_tomorrow_low = model_low.predict(last_row)[0]

tomorrow_date = pd.to_datetime(df['date'].iloc[-1]) + timedelta(days=1)

print("\n---- Tomorrow's Forecast ----")
print(f"Date: {tomorrow_date.date()}")
print(f"Predicted Close: {pred_tomorrow_close:.2f}")
print(f"Predicted Low:   {pred_tomorrow_low:.2f}")


  df = yf.download(ticker, period=f"{years}y")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period=f"{years}y")


         date  Close_QQQ   High_QQQ    Low_QQQ   Open_QQQ  Volume_QQQ
0  2010-10-04  42.493675  42.993291  42.248251  42.835517    71359900
1  2010-10-05  43.527950  43.615601  42.949448  42.975742    99301300
2  2010-10-06  43.151073  43.571803  42.870587  43.475385    81831200
3  2010-10-07  43.308838  43.422786  42.949465  43.396489    75693300
4  2010-10-08  43.606850  43.712031  43.019583  43.326364    83223900


[*********************100%***********************]  1 of 1 completed


         date  Close_^GSPC   High_^GSPC    Low_^GSPC   Open_^GSPC  \
0  2010-10-04  1137.030029  1148.160034  1131.869995  1144.959961   
1  2010-10-05  1160.750000  1162.760010  1140.680054  1140.680054   
2  2010-10-06  1159.969971  1162.329956  1154.849976  1159.810059   
3  2010-10-07  1158.060059  1163.869995  1151.410034  1161.569946   
4  2010-10-08  1165.150024  1167.729980  1155.579956  1158.359985   

   Volume_^GSPC  
0    3604110000  
1    4068840000  
2    4073160000  
3    3910550000  
4    3871420000  


KeyError: "['spx_close'] not in index"