In [12]:
# ✅ Imports
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os

# ✅ Load dataset
df = pd.read_csv("processed_data.csv", parse_dates=["Date"])

# ✅ Filter for 6 BigTech companies
tickers = ["AAPL", "GOOG", "MSFT", "META", "NVDA", "AMZN"]
df = df[df["Ticker"].isin(tickers)].copy()
df = df.sort_values(by=["Ticker", "Date"]).reset_index(drop=True)

# ✅ Feature Engineering (Price-Based Only)
df["Return_1d"] = df.groupby("Ticker")["Close"].pct_change(1)
df["Return_3d"] = df.groupby("Ticker")["Close"].pct_change(3)
df["MA_5"] = df.groupby("Ticker")["Close"].transform(lambda x: x.rolling(window=5).mean())
df["Volatility_5d"] = df.groupby("Ticker")["Close"].transform(lambda x: x.rolling(window=5).std())
df["Lag_1"] = df.groupby("Ticker")["Close"].shift(1)
df["Lag_2"] = df.groupby("Ticker")["Close"].shift(2)
df["Lag_3"] = df.groupby("Ticker")["Close"].shift(3)
df["Volume_Change"] = df.groupby("Ticker")["Volume"].pct_change(1)
df["Return_5d"] = df.groupby("Ticker")["Close"].pct_change(5)
df["Target_Close"] = df.groupby("Ticker")["Close"].shift(-1)

# ✅ Drop rows with any missing values
df = df.dropna().reset_index(drop=True)

# ✅ Initialize result storage
all_predictions = []
features = ["Return_1d", "Return_3d", "MA_5", "Volatility_5d", "Lag_1", "Lag_2", "Lag_3", "Volume_Change", "Return_5d"]

# ✅ Modeling loop
for ticker in tickers:
    df_ticker = df[df["Ticker"] == ticker].copy()
    X = df_ticker[features]
    y = df_ticker["Target_Close"]

    # Time-series-aware split
    split_index = int(len(df_ticker) * 0.8)
    X_train = X.iloc[:split_index]
    X_test = X.iloc[split_index:]
    y_train = y.iloc[:split_index]
    y_test = y.iloc[split_index:]

    # Scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Model (simpler to avoid overfitting)
    model = xgb.XGBRegressor(n_estimators=30, max_depth=3, learning_rate=0.1, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    # Evaluation
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"🌟 {ticker} Model Performance:")
    print(f"   R^2   : {r2:.4f}")
    print(f"   RMSE  : {rmse:.2f}")
    print(f"   MAE   : {mae:.2f}")
    print("-" * 30)

    # Collect predictions
    df_pred = df_ticker.loc[X_test.index, ["Date", "Ticker", "Close"]].copy()
    df_pred["Predicted_Close"] = y_pred
    all_predictions.append(df_pred)

# ✅ Export final prediction file
final_df = pd.concat(all_predictions).sort_values(by=["Ticker", "Date"])
final_df.to_csv("ml_predictions.csv", index=False)
print("\n✅ Predictions saved to: ml_predictions.csv")

🌟 AAPL Model Performance:
   R^2   : -0.1416
   RMSE  : 10.98
   MAE   : 8.76
------------------------------
🌟 GOOG Model Performance:
   R^2   : 0.9749
   RMSE  : 2.05
   MAE   : 1.66
------------------------------
🌟 MSFT Model Performance:
   R^2   : 0.1757
   RMSE  : 34.21
   MAE   : 22.43
------------------------------
🌟 META Model Performance:
   R^2   : 0.7487
   RMSE  : 36.41
   MAE   : 15.07
------------------------------
🌟 NVDA Model Performance:
   R^2   : -1.0855
   RMSE  : 19.62
   MAE   : 15.11
------------------------------
🌟 AMZN Model Performance:
   R^2   : 0.9963
   RMSE  : 1.24
   MAE   : 0.87
------------------------------

✅ Predictions saved to: ml_predictions.csv


