# 05 - ML Modeling and Prediction
This notebook adds a machine learning model to predict intraday continuation based on premarket indicators.

In [8]:
# 1. Load packages
import pandas as pd
import yfinance as yf
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# 2. Load features
df = pd.read_csv("../data/feature_output.csv")
df["date"] = pd.to_datetime(df["timestamp"]).dt.date

# 3. Get unique dates and tickers
unique_dates = df["date"].unique()
tickers = df["ticker"].unique().tolist()

# 4. Fetch close prices using yfinance (safe method)
close_prices = []

for ticker in tickers:
    ticker_data = yf.download(
        ticker,
        start=str(df["date"].min()),
        end=str(df["date"].max() + pd.Timedelta(days=1)),
        progress=False,
        auto_adjust=True
    )

    if not ticker_data.empty:
        df_temp = ticker_data.reset_index()
        df_temp["ticker"] = ticker
        df_temp["date"] = pd.to_datetime(df_temp["Date"]).dt.date
        df_temp = df_temp[["ticker", "date", "Close"]].rename(columns={"Close": "close"})
        close_prices.append(df_temp)

# Combine all ticker close prices
close_df = pd.concat(close_prices, ignore_index=True)

# 5. Merge back into main DataFrame using both 'ticker' and 'date'
df = df.merge(close_df, on=["ticker", "date"], how="left")

# 6. Drop missing close values
df = df.dropna(subset=["close", "prev_close"])

# 7. Ensure both are numeric (not object dtype or Series)
df["close"] = pd.to_numeric(df["close"], errors="coerce")
df["prev_close"] = pd.to_numeric(df["prev_close"], errors="coerce")

# 8. Calculate close % change
df["close_pct_change"] = ((df["close"] - df["prev_close"]) / df["prev_close"]) * 100

# 9. Create target column
df["target"] = (df["close_pct_change"] > 0).astype(int)

# 10. Save and preview
df.to_csv("../data/feature_output_with_target.csv", index=False)
df[["ticker", "prev_close", "close", "close_pct_change", "target"]].head()

MergeError: Not allowed to merge between different levels. (1 levels on the left, 2 on the right)

We'll define a simple target: whether the stock closed green or red **after the premarket move**.

In [2]:
# Example synthetic target
df["target"] = (df["close_pct_change"] > 0).astype(int)

KeyError: 'close_pct_change'