# LSTM + GRU Hybrid Model for Stock Prediction

## Imports and Config

In [1]:
#cell 1: imports + seeds + device
import os
import random
from pathlib import Path

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)


device: cuda


## Loading the Data

In [2]:
#cell 2: project paths + load panel + filter AAPL
cwd = Path.cwd().resolve()
project_root = None
for p in [cwd] + list(cwd.parents):
    if (p / "data").exists():
        project_root = p
        break
if project_root is None:
    raise RuntimeError("Project root not found: missing /data folder")

DATA_DIR = project_root / "data"
PROC_DIR = DATA_DIR / "processed"

panel_path = PROC_DIR / "modeling_panel_targets.parquet"
print("project_root:", project_root)
print("panel_path:", panel_path)

df = pd.read_parquet(panel_path)
df["date"] = pd.to_datetime(df["date"])

df = df.sort_values(["ticker", "date"]).reset_index(drop=True)
df = df[df["ticker"] == "AAPL"].copy()
df = df.sort_values("date").reset_index(drop=True)

# --- 20-day forward log return target (create if missing) ---
HORIZON = 20
target_20 = f"target_logret_{HORIZON}d"
price_col = "Adj Close" if "Adj Close" in df.columns else "Close"

if target_20 not in df.columns:
    df[target_20] = np.log(df[price_col].shift(-HORIZON) / df[price_col])

print("Using target column:", target_20, "| price_col:", price_col)

df.columns = (
    pd.Index(df.columns)
    .str.replace("_logret_laglogret_lag", "_logret_lag", regex=False)
)

cols_to_drop = [c for c in df.columns if c.startswith(("MSFT_", "GOOG_", "AMZN_"))]
df = df.drop(columns=cols_to_drop)

print("AAPL df shape:", df.shape)
display(df.head(3))


project_root: /media/workhorse/Windows/MyWorkPlace/Projects/StockPredictor
panel_path: /media/workhorse/Windows/MyWorkPlace/Projects/StockPredictor/data/processed/modeling_panel_targets.parquet
Using target column: target_logret_20d | price_col: Adj Close
AAPL df shape: (3285, 287)


Unnamed: 0,date,ticker,Open,High,Low,Close,Adj Close,Volume,adj_close,logret_1d,...,AAPL_logret_lag20,CL=F_logret_lag20,GC=F_logret_lag20,QQQ_logret_lag20,SPY_logret_lag20,UUP_logret_lag20,XLK_logret_lag20,^TNX_logret_lag20,^VIX_logret_lag20,target_logret_20d
0,2013-01-02,AAPL,19.779285,19.821428,19.343929,19.608213,16.612209,560518000.0,16.612209,,...,,,,,,,,,,-0.186779
1,2013-01-03,AAPL,19.567142,19.631071,19.321428,19.360714,16.402523,352965200.0,16.402523,-0.012703,...,,,,,,,,,,-0.17819
2,2013-01-04,AAPL,19.1775,19.236786,18.779642,18.821428,15.945646,594333600.0,15.945646,-0.028249,...,,,,,,,,,,-0.175167


In [3]:
#print all the columns in the dataframe
print("Columns in the dataframe:", df.columns.tolist())

Columns in the dataframe: ['date', 'ticker', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'adj_close', 'logret_1d', 'ret_1d', 'target_logret_1d', 'hl_range', 'oc_change', 'ret_mean_5', 'ret_vol_5', 'ret_mean_10', 'ret_vol_10', 'ret_mean_20', 'ret_vol_20', 'ret_mean_60', 'ret_vol_60', 'sma_5', 'sma_10', 'sma_20', 'sma_60', 'sma_spread_5', 'sma_spread_20', 'sma_spread_60', 'ema_12', 'ema_26', 'macd', 'macd_signal', 'macd_hist', 'rsi_14', 'bb_mid_20', 'bb_std_20', 'bb_upper_20', 'bb_lower_20', 'bb_width_20', 'vol_change', 'vol_z_20', 'obv', 'dow', 'month', 'is_month_end', 'logret_1d_lag1', 'logret_1d_lag2', 'logret_1d_lag3', 'logret_1d_lag4', 'logret_1d_lag5', 'logret_1d_lag6', 'logret_1d_lag7', 'logret_1d_lag8', 'logret_1d_lag9', 'logret_1d_lag10', 'logret_1d_lag11', 'logret_1d_lag12', 'logret_1d_lag13', 'logret_1d_lag14', 'logret_1d_lag15', 'logret_1d_lag16', 'logret_1d_lag17', 'logret_1d_lag18', 'logret_1d_lag19', 'logret_1d_lag20', 'logret_1d_lag21', 'logret_1d_lag22', 'logr

## Data Split

In [4]:
#cell 3: define X/y and time split (OLD VERSION)
'''TARGET = "target_logret_20d"

drop_cols = {
    "date", "ticker",
    TARGET,
}

num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
feature_cols = [c for c in num_cols if c not in drop_cols]

model_df = df[["date", TARGET] + feature_cols].dropna().reset_index(drop=True)'''

#cell 3: define X/y and time split (drop ALL other target_* columns to avoid leakage)
TARGET = "target_logret_20d"

drop_cols = {"date", "ticker", TARGET}

num_cols = df.select_dtypes(include=[np.number]).columns.tolist()

# Drop any other target-like columns (future info leakage)
leak_cols = [c for c in num_cols if c.startswith("target_") and c != TARGET]
drop_cols = drop_cols.union(leak_cols)

feature_cols = [c for c in num_cols if c not in drop_cols]

print("Dropped potential leakage columns:", len(leak_cols))
if len(leak_cols) > 0:
    print("Examples:", leak_cols[:10])

model_df = df[["date", TARGET] + feature_cols].dropna().reset_index(drop=True)


n = len(model_df)
split_idx = int(n * 0.80)

train_df = model_df.iloc[:split_idx].copy()
test_df  = model_df.iloc[split_idx:].copy()

X_train = train_df[feature_cols].copy()
y_train = train_df[TARGET].copy()

X_test  = test_df[feature_cols].copy()
y_test  = test_df[TARGET].copy()

print("model_df shape:", model_df.shape)
print("train rows:", len(train_df), "test rows:", len(test_df))
print("train date range:", train_df["date"].min(), "to", train_df["date"].max())
print("test  date range:", test_df["date"].min(), "to", test_df["date"].max())


Dropped potential leakage columns: 1
Examples: ['target_logret_1d']
model_df shape: (3078, 285)
train rows: 2462 test rows: 616
train date range: 2013-04-02 00:00:00 to 2023-05-10 00:00:00
test  date range: 2023-05-11 00:00:00 to 2025-12-23 00:00:00


In [5]:
print("Any target_* columns still in features?",
      any(c.startswith("target_") for c in feature_cols))

Any target_* columns still in features? False


## Scaling and PCA

In [6]:
#cell 4: StandardScaler + PCA(80% variance)
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train.values)
X_test_s  = scaler.transform(X_test.values)

pca = PCA(n_components=0.80, svd_solver="full")
X_train_p = pca.fit_transform(X_train_s)
X_test_p  = pca.transform(X_test_s)

pca_columns = [f"pca_{i+1}" for i in range(pca.n_components_)]

print("PCA components for 80% variance:", pca.n_components_)
print("Explained variance ratio sum:", float(pca.explained_variance_ratio_.sum()))
print("X_train_p:", X_train_p.shape, "X_test_p:", X_test_p.shape)


PCA components for 80% variance: 91
Explained variance ratio sum: 0.8024344539261916
X_train_p: (2462, 91) X_test_p: (616, 91)


## Sliding Window Sequences

In [7]:
#cell 5: build sliding-window sequences (lookback=60) with test continuity
LOOKBACK = 60

def make_sequences(X_2d, y_1d, lookback):
    Xs, ys = [], []
    for t in range(lookback, len(X_2d)):
        Xs.append(X_2d[t - lookback:t])
        ys.append(y_1d[t])
    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32)

Xtr_seq, ytr_seq = make_sequences(X_train_p, y_train.values, LOOKBACK)

#test continuity: prepend last LOOKBACK rows of train to test features, then keep test targets only
X_test_full = np.vstack([X_train_p[-LOOKBACK:], X_test_p])
y_test_full = np.concatenate([y_train.values[-LOOKBACK:], y_test.values])

Xte_seq_all, yte_seq_all = make_sequences(X_test_full, y_test_full, LOOKBACK)

#only keep sequences whose target is in the real test period
Xte_seq = Xte_seq_all
yte_seq = yte_seq_all

print("train sequences:", Xtr_seq.shape, ytr_seq.shape)
print("test  sequences:", Xte_seq.shape, yte_seq.shape)


train sequences: (2402, 60, 91) (2402,)
test  sequences: (616, 60, 91) (616,)


## Temporal Train/Val Split

In [8]:
#cell 6: temporal train/val split on sequences (train-only)
n_train_seq = len(Xtr_seq)
val_start = int(n_train_seq * 0.90)

Xtr2, ytr2 = Xtr_seq[:val_start], ytr_seq[:val_start]
Xva,  yva  = Xtr_seq[val_start:], ytr_seq[val_start:]

print("train2:", Xtr2.shape, ytr2.shape)
print("val   :", Xva.shape, yva.shape)


train2: (2161, 60, 91) (2161,)
val   : (241, 60, 91) (241,)


In [9]:
# DIAG: exact target-date ranges for sequences and splits
train_target_dates = train_df["date"].values[LOOKBACK:]   # one per ytr_seq
val_start = int(len(train_target_dates) * 0.90)

print("Train target dates total:", len(train_target_dates), "== ytr_seq:", len(ytr_seq))
print("Train2 targets:", val_start, "| Val targets:", len(train_target_dates) - val_start)

print("TRAIN2 date range:",
      pd.to_datetime(train_target_dates[0]),
      "to",
      pd.to_datetime(train_target_dates[val_start-1]))

print("VAL date range:",
      pd.to_datetime(train_target_dates[val_start]),
      "to",
      pd.to_datetime(train_target_dates[-1]))

print("TEST date range:",
      pd.to_datetime(test_df["date"].values[0]),
      "to",
      pd.to_datetime(test_df["date"].values[-1]))


Train target dates total: 2402 == ytr_seq: 2402
Train2 targets: 2161 | Val targets: 241
TRAIN2 date range: 2013-06-26 00:00:00 to 2022-05-24 00:00:00
VAL date range: 2022-05-25 00:00:00 to 2023-05-10 00:00:00
TEST date range: 2023-05-11 00:00:00 to 2025-12-23 00:00:00


## Loading the LSTM + GRU Model

In [10]:
#cell 7: torch datasets + loaders
class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def make_loader(X, y, batch_size, shuffle):
    ds = SeqDataset(X, y)
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle, drop_last=False)

print("ready")


ready


In [11]:
#cell 8: LSTM -> GRU -> Dense -> Dense -> Output model
class LSTMGRU(nn.Module):
    def __init__(self, n_features, hidden_lstm=128, hidden_gru=128, dense1=64, dense2=32, dropout=0.0):
        super().__init__()
        self.lstm = nn.LSTM(input_size=n_features, hidden_size=hidden_lstm, batch_first=True)
        self.gru  = nn.GRU(input_size=hidden_lstm, hidden_size=hidden_gru, batch_first=True)
        self.fc1  = nn.Linear(hidden_gru, dense1)
        self.fc2  = nn.Linear(dense1, dense2)
        self.out  = nn.Linear(dense2, 1)
        self.act  = nn.ReLU()
        self.drop = nn.Dropout(dropout)

    def forward(self, x):
        x, _ = self.lstm(x)
        x, _ = self.gru(x)
        x = x[:, -1, :]
        x = self.drop(self.act(self.fc1(x)))
        x = self.drop(self.act(self.fc2(x)))
        y = self.out(x).squeeze(-1)
        return y

print("ready")


ready


In [None]:
#cell 9: training utilities

from sklearn.metrics import r2_score

def rmse_np(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def r2_np(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    return float(r2_score(y_true, y_pred))


@torch.no_grad()
def predict_loader(model, loader):
    model.eval()
    preds = []
    ys = []
    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)
        p = model(xb)
        preds.append(p.detach().cpu().numpy())
        ys.append(yb.detach().cpu().numpy())
    return np.concatenate(ys), np.concatenate(preds)

def train_one_trial(model, train_loader, val_loader, lr, weight_decay, grad_clip, max_epochs, patience):
    crit = nn.MSELoss()
    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    best_val_r2 = -float("inf")
    best_state = None
    no_improve = 0

    for epoch in range(max_epochs):
        model.train()
        for xb, yb in train_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            opt.zero_grad(set_to_none=True)
            pred = model(xb)
            loss = crit(pred, yb)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=grad_clip)
            opt.step()

        yv_true, yv_pred = predict_loader(model, val_loader)
        val_r2 = r2_np(yv_true, yv_pred)

        if val_r2 > best_val_r2:
            best_val_r2 = val_r2
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                break

    model.load_state_dict(best_state)
    return float(best_val_r2)




## Training the Model

In [18]:
#cell 10: Grid Search (single validation split + resume + append-to-CSV per run)
import itertools
import time
import json

N_FEATURES = Xtr2.shape[-1]

# ---- Grid (edit ONLY these lists to widen/narrow the grid) ----
GRID = {
    "hidden_lstm":  [64, 128, 256],
    "hidden_gru":   [64, 128, 256],
    "dense1":       [32, 64, 128],
    "dense2":       [16, 32, 64],
    "dropout":      [0.0, 0.2],
    "lr":           [5e-4, 1e-3],
    "weight_decay": [1e-6, 1e-4],
    "batch_size":   [64, 128],
    "grad_clip":    [1.0],
}

MAX_EPOCHS = 50
PATIENCE = 7
BASE_SEED = 42

runs_path = PROC_DIR / "phase2_gridsearch_runs.csv"

param_keys = list(GRID.keys())
all_params = [
    dict(zip(param_keys, vals))
    for vals in itertools.product(*[GRID[k] for k in param_keys])
]
print("total grid combinations:", len(all_params))
print("runs_path:", runs_path)

def config_to_id(p):
    # stable + comparable across sessions for resume/skip
    return json.dumps(p, sort_keys=True)

def append_row_csv(path, row, columns):
    dfrow = pd.DataFrame([row], columns=columns)
    header = not path.exists()
    dfrow.to_csv(path, mode="a", header=header, index=False)

# Resume support: skip already completed configs (by config_id)
done_ids = set()
next_run_id = 1
if runs_path.exists():
    prev = pd.read_csv(runs_path)
    if len(prev) > 0:
        if "config_id" in prev.columns:
            done_ids = set(prev["config_id"].astype(str).tolist())
        next_run_id = int(prev["run_id"].max()) + 1

columns = [
    #"run_id", "seed", "status", "val_rmse", "train_seconds", "config_id", "error_msg",
    "run_id", "seed", "status", "val_r2", "train_seconds", "config_id", "error_msg",

    *param_keys
]

#best_val = float("inf")
best_val = -float("inf")

best_params = None
best_run_id = None

print("starting grid search...")
for p in all_params:
    cfg_id = config_to_id(p)
    if cfg_id in done_ids:
        continue

    run_id = next_run_id
    next_run_id += 1

    # per-run seeding -> reproducible per configuration
    seed = BASE_SEED + run_id
    set_seed(seed)

    train_loader = make_loader(Xtr2, ytr2, batch_size=int(p["batch_size"]), shuffle=True)
    val_loader   = make_loader(Xva,  yva,  batch_size=int(p["batch_size"]), shuffle=False)

    model = LSTMGRU(
        n_features=N_FEATURES,
        hidden_lstm=int(p["hidden_lstm"]),
        hidden_gru=int(p["hidden_gru"]),
        dense1=int(p["dense1"]),
        dense2=int(p["dense2"]),
        dropout=float(p["dropout"]),
    ).to(device)

    t0 = time.time()
    status = "ok"
    err_msg = ""

    try:
        #val_rmse = train_one_trial(
        val_r2 = train_one_trial(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            lr=float(p["lr"]),
            weight_decay=float(p["weight_decay"]),
            grad_clip=float(p["grad_clip"]),
            max_epochs=MAX_EPOCHS,
            patience=PATIENCE
        )

    except Exception as e:
        val_r2 = np.nan
        status = f"error:{type(e).__name__}"
        err_msg = str(e)[:1500]

    train_seconds = float(time.time() - t0)

    row = {
        "run_id": run_id,
        "seed": seed,
        "status": status,
        #"val_rmse": float(val_rmse) if np.isfinite(val_rmse) else np.nan,
        "val_r2": float(val_r2) if np.isfinite(val_r2) else np.nan,
        "train_seconds": train_seconds,
        "config_id": cfg_id,
        "error_msg": err_msg,
        **p
    }

    # SAVE IMMEDIATELY after each run
    append_row_csv(runs_path, row, columns)

    #print(f"[run {run_id}] status={status} val_rmse={row['val_rmse']:.6f} params={p}")
    print(f"[run {run_id}] status={status} val_r2={row['val_r2']:.6f} params={p}")


    #if status == "ok" and np.isfinite(val_rmse) and val_rmse < best_val:
    if status == "ok" and np.isfinite(val_r2) and val_r2 > best_val:
        #best_val = float(val_rmse)
        best_val = float(val_r2)
        best_params = dict(p)
        best_run_id = run_id

    # cleanup between runs (helps GPU memory)
    del model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

# Report best-so-far (also works if you stopped and resumed later)
if runs_path.exists():
    runs = pd.read_csv(runs_path)
    runs_ok = runs[runs["status"].astype(str) == "ok"].copy()
    #runs_ok = runs_ok.dropna(subset=["val_rmse"])
    #runs_ok = runs_ok.sort_values("val_rmse", ascending=True)
    runs_ok = runs_ok.dropna(subset=["val_r2"])
    runs_ok = runs_ok.sort_values("val_r2", ascending=False)

    if len(runs_ok) > 0:
        top = runs_ok.iloc[0]
        best_run_id = int(top["run_id"])
        #best_val = float(top["val_rmse"])
        best_val = float(top["val_r2"])
        best_params = {k: top[k] for k in param_keys}

        # cast types cleanly
        for k in ["hidden_lstm","hidden_gru","dense1","dense2","batch_size"]:
            best_params[k] = int(best_params[k])
        for k in ["dropout","lr","weight_decay","grad_clip"]:
            best_params[k] = float(best_params[k])

        print("\nBEST (from CSV):")
        print("best_run_id:", best_run_id)
        #print("best val_rmse:", best_val)
        print("best val_r2:", best_val)

        print("best params:", best_params)
    else:
        print("\nNo successful runs yet (check errors in the CSV).")


total grid combinations: 1296
runs_path: /media/workhorse/Windows/MyWorkPlace/Projects/StockPredictor/data/processed/phase2_gridsearch_runs.csv
starting grid search...
[run 1] status=ok val_r2=0.002711 params={'hidden_lstm': 64, 'hidden_gru': 64, 'dense1': 32, 'dense2': 16, 'dropout': 0.0, 'lr': 0.0005, 'weight_decay': 1e-06, 'batch_size': 64, 'grad_clip': 1.0}
[run 2] status=ok val_r2=-0.055607 params={'hidden_lstm': 64, 'hidden_gru': 64, 'dense1': 32, 'dense2': 16, 'dropout': 0.0, 'lr': 0.0005, 'weight_decay': 1e-06, 'batch_size': 128, 'grad_clip': 1.0}
[run 3] status=ok val_r2=0.003293 params={'hidden_lstm': 64, 'hidden_gru': 64, 'dense1': 32, 'dense2': 16, 'dropout': 0.0, 'lr': 0.0005, 'weight_decay': 0.0001, 'batch_size': 64, 'grad_clip': 1.0}
[run 4] status=ok val_r2=-0.048926 params={'hidden_lstm': 64, 'hidden_gru': 64, 'dense1': 32, 'dense2': 16, 'dropout': 0.0, 'lr': 0.0005, 'weight_decay': 0.0001, 'batch_size': 128, 'grad_clip': 1.0}
[run 5] status=ok val_r2=-0.126184 params=

In [20]:
#cell 11: retrain TOP 10 configs from CSV and print results (no saving)
grid_path = PROC_DIR / "phase2_gridsearch_runs.csv"
runs = pd.read_csv(grid_path)

runs_ok = runs[runs["status"].astype(str) == "ok"].copy()
runs_ok = runs_ok.dropna(subset=["val_r2"]).sort_values("val_r2", ascending=False)

TOPK = 10
top_runs = runs_ok.head(TOPK).reset_index(drop=True)

if len(top_runs) == 0:
    raise RuntimeError("No successful grid runs found. Check phase2_gridsearch_runs.csv error_msg column.")

print("Top runs loaded:", len(top_runs))
display(top_runs[["run_id","seed","val_r2","hidden_lstm","hidden_gru","dense1","dense2","dropout","lr","weight_decay","batch_size","grad_clip"]])

# Use SAME split as grid search (this matches your Cell 6 logic)
batch_size_default = int(top_runs.loc[0, "batch_size"])

n_train_seq = len(Xtr_seq)
val_start = int(n_train_seq * 0.90)
XtrF, ytrF = Xtr_seq[:val_start], ytr_seq[:val_start]
XvaF, yvaF = Xtr_seq[val_start:], ytr_seq[val_start:]

# We'll recreate loaders per run because batch_size changes
MAX_EPOCHS = 50
PATIENCE = 7

results = []
best_val_r2 = -float("inf")
best_state = None
best_run_id = None
best_params = None

for i in range(len(top_runs)):
    r = top_runs.iloc[i]

    params = {
        "hidden_lstm":  int(r["hidden_lstm"]),
        "hidden_gru":   int(r["hidden_gru"]),
        "dense1":       int(r["dense1"]),
        "dense2":       int(r["dense2"]),
        "dropout":      float(r["dropout"]),
        "lr":           float(r["lr"]),
        "weight_decay": float(r["weight_decay"]),
        "batch_size":   int(r["batch_size"]),
        "grad_clip":    float(r["grad_clip"]),
    }

    run_id = int(r["run_id"])
    seed = int(r["seed"])
    csv_val_r2 = float(r["val_r2"])

    set_seed(seed)

    train_loaderF = make_loader(XtrF, ytrF, batch_size=params["batch_size"], shuffle=True)
    val_loaderF   = make_loader(XvaF, yvaF, batch_size=params["batch_size"], shuffle=False)

    model = LSTMGRU(
        n_features=Xtr_seq.shape[-1],
        hidden_lstm=params["hidden_lstm"],
        hidden_gru=params["hidden_gru"],
        dense1=params["dense1"],
        dense2=params["dense2"],
        dropout=params["dropout"],
    ).to(device)

    best_r2_train = train_one_trial(
        model=model,
        train_loader=train_loaderF,
        val_loader=val_loaderF,
        lr=params["lr"],
        weight_decay=params["weight_decay"],
        grad_clip=params["grad_clip"],
        max_epochs=MAX_EPOCHS,
        patience=PATIENCE
    )

    # compute RMSE + R2 on val at the loaded-best state
    yva_true, yva_pred = predict_loader(model, val_loaderF)
    val_r2_retrain = r2_np(yva_true, yva_pred)
    val_rmse_retrain = rmse_np(yva_true, yva_pred)

    out = {
        "rank": i+1,
        "run_id": run_id,
        "seed": seed,
        "csv_val_r2": csv_val_r2,
        "retrain_val_r2": float(val_r2_retrain),
        "retrain_val_rmse": float(val_rmse_retrain),
        **params
    }
    results.append(out)

    print(f"\n[{i+1}/{TOPK}] run_id={run_id} seed={seed}")
    print("params:", params)
    print(f"csv val_r2={csv_val_r2:.6f} | retrain val_r2={val_r2_retrain:.6f} | retrain val_rmse={val_rmse_retrain:.6f}")

    # keep best retrained model
    if val_r2_retrain > best_val_r2:
        best_val_r2 = float(val_r2_retrain)
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
        best_run_id = run_id
        best_params = dict(params)

    del model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

results_df = pd.DataFrame(results).sort_values("retrain_val_r2", ascending=False)
print("\nSUMMARY (sorted by retrain_val_r2):")
display(results_df)

# Build final_model from best retrained run for Cell 12
print("\nCHOSEN BEST (by retrain_val_r2): run_id =", best_run_id, "val_r2 =", best_val_r2)
best = best_params

final_model = LSTMGRU(
    n_features=Xtr_seq.shape[-1],
    hidden_lstm=best["hidden_lstm"],
    hidden_gru=best["hidden_gru"],
    dense1=best["dense1"],
    dense2=best["dense2"],
    dropout=best["dropout"],
).to(device)

final_model.load_state_dict(best_state)


Top runs loaded: 10


Unnamed: 0,run_id,seed,val_r2,hidden_lstm,hidden_gru,dense1,dense2,dropout,lr,weight_decay,batch_size,grad_clip
0,397,439,0.117007,64,256,128,16,0.2,0.001,1e-06,64,1.0
1,76,118,0.099318,64,64,64,32,0.2,0.0005,0.0001,128,1.0
2,187,229,0.097808,64,128,32,64,0.2,0.0005,0.0001,64,1.0
3,235,277,0.092132,64,128,64,64,0.2,0.0005,0.0001,64,1.0
4,348,390,0.08318,64,256,64,16,0.2,0.0005,0.0001,128,1.0
5,368,410,0.076706,64,256,64,32,0.2,0.001,0.0001,128,1.0
6,169,211,0.075623,64,128,32,32,0.2,0.0005,1e-06,64,1.0
7,350,392,0.075362,64,256,64,16,0.2,0.001,1e-06,128,1.0
8,390,432,0.070169,64,256,128,16,0.0,0.001,1e-06,128,1.0
9,312,354,0.06175,64,256,32,32,0.0,0.001,0.0001,128,1.0



[1/10] run_id=397 seed=439
params: {'hidden_lstm': 64, 'hidden_gru': 256, 'dense1': 128, 'dense2': 16, 'dropout': 0.2, 'lr': 0.001, 'weight_decay': 1e-06, 'batch_size': 64, 'grad_clip': 1.0}
csv val_r2=0.117007 | retrain val_r2=0.117007 | retrain val_rmse=0.077059

[2/10] run_id=76 seed=118
params: {'hidden_lstm': 64, 'hidden_gru': 64, 'dense1': 64, 'dense2': 32, 'dropout': 0.2, 'lr': 0.0005, 'weight_decay': 0.0001, 'batch_size': 128, 'grad_clip': 1.0}
csv val_r2=0.099318 | retrain val_r2=0.099318 | retrain val_rmse=0.077827

[3/10] run_id=187 seed=229
params: {'hidden_lstm': 64, 'hidden_gru': 128, 'dense1': 32, 'dense2': 64, 'dropout': 0.2, 'lr': 0.0005, 'weight_decay': 0.0001, 'batch_size': 64, 'grad_clip': 1.0}
csv val_r2=0.097808 | retrain val_r2=0.097808 | retrain val_rmse=0.077892

[4/10] run_id=235 seed=277
params: {'hidden_lstm': 64, 'hidden_gru': 128, 'dense1': 64, 'dense2': 64, 'dropout': 0.2, 'lr': 0.0005, 'weight_decay': 0.0001, 'batch_size': 64, 'grad_clip': 1.0}
csv val_

Unnamed: 0,rank,run_id,seed,csv_val_r2,retrain_val_r2,retrain_val_rmse,hidden_lstm,hidden_gru,dense1,dense2,dropout,lr,weight_decay,batch_size,grad_clip
0,1,397,439,0.117007,0.117007,0.077059,64,256,128,16,0.2,0.001,1e-06,64,1.0
1,2,76,118,0.099318,0.099318,0.077827,64,64,64,32,0.2,0.0005,0.0001,128,1.0
2,3,187,229,0.097808,0.097808,0.077892,64,128,32,64,0.2,0.0005,0.0001,64,1.0
3,4,235,277,0.092132,0.092132,0.078137,64,128,64,64,0.2,0.0005,0.0001,64,1.0
4,5,348,390,0.08318,0.08318,0.078521,64,256,64,16,0.2,0.0005,0.0001,128,1.0
5,6,368,410,0.076706,0.076706,0.078798,64,256,64,32,0.2,0.001,0.0001,128,1.0
6,7,169,211,0.075623,0.075623,0.078844,64,128,32,32,0.2,0.0005,1e-06,64,1.0
7,8,350,392,0.075362,0.075362,0.078855,64,256,64,16,0.2,0.001,1e-06,128,1.0
8,9,390,432,0.070169,0.070169,0.079076,64,256,128,16,0.0,0.001,1e-06,128,1.0
9,10,312,354,0.06175,0.06175,0.079434,64,256,32,32,0.0,0.001,0.0001,128,1.0



CHOSEN BEST (by retrain_val_r2): run_id = 397 val_r2 = 0.11700719594955444


<All keys matched successfully>

## Evaluation of the Model

In [22]:
#cell 12: test evaluation + save metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def mda(y_true, y_pred):
    yt = np.asarray(y_true)
    yp = np.asarray(y_pred)
    true_dir = (yt > 0).astype(int)
    pred_dir = (yp > 0).astype(int)
    return float(np.mean(true_dir == pred_dir))

def eval_metrics(y_true, y_pred):
    return {
        "MAE":  float(mean_absolute_error(y_true, y_pred)),
        "RMSE": float(np.sqrt(mean_squared_error(y_true, y_pred))),
        "R2":   float(r2_score(y_true, y_pred)),
        "MDA":  float(mda(y_true, y_pred)),
    }

test_loader = make_loader(Xte_seq, yte_seq, batch_size=256, shuffle=False)
y_true_te, y_pred_te = predict_loader(final_model, test_loader)

metrics = eval_metrics(y_true_te, y_pred_te)
print(metrics)

out_path = PROC_DIR / "phase2_metrics.csv"
row = {
    "model": f"LSTM_GRU_PCA_GridSearch_run{best_run_id}",
    **metrics
}

if out_path.exists():
    prev = pd.read_csv(out_path)
    new = pd.concat([prev, pd.DataFrame([row])], ignore_index=True)
else:
    new = pd.DataFrame([row])

new.to_csv(out_path, index=False)
print("saved:", out_path)
display(new.tail(50))


{'MAE': 0.06309177726507187, 'RMSE': 0.07652710040284857, 'R2': -0.37250685691833496, 'MDA': 0.48863636363636365}
saved: /media/workhorse/Windows/MyWorkPlace/Projects/StockPredictor/data/processed/phase2_metrics.csv


Unnamed: 0,model,MAE,RMSE,R2,MDA
0,LSTM_GRU_PCA_Optuna,0.010902,0.016297,0.004003,0.530645
1,LSTM_GRU_PCA_Optuna,0.011765,0.017072,-0.093003,0.553226
2,LSTM_GRU_PCA_Optuna,0.010906,0.016367,-0.004573,0.543548
3,LSTM_GRU_PCA_Optuna,0.011357,0.016551,-0.0274,0.470968
4,LSTM_GRU_PCA_Optuna,0.011533,0.016739,-0.050865,0.485484
5,LSTM_GRU_PCA_Optuna,0.01183,0.016986,-0.082041,0.509677
6,SimpleLSTM_NoPCA_TargetScaled_Optuna,0.011583,0.017131,-0.100588,0.482258
7,LSTM_GRU_PCA_Optuna,0.014199,0.019408,-0.412698,0.543548
8,LSTM_GRU_PCA_Optuna,0.014824,0.019441,-0.417465,0.453226
9,LSTM_GRU_PCA_Optuna,0.012307,0.01753,-0.15243,0.506452


## Saving the model artifacts

In [23]:
#cell 13: save phase 2 artifacts
import joblib
import json

art_dir = PROC_DIR / "phase2_artifacts"
art_dir.mkdir(parents=True, exist_ok=True)

#save preprocessing objects
joblib.dump(scaler, art_dir / "scaler.joblib")
joblib.dump(pca,    art_dir / "pca.joblib")

#save model weights
torch.save(final_model.state_dict(), art_dir / "lstm_gru_pca_weights.pt")

#save grid best params
with open(art_dir / "grid_best_params.json", "w") as f:
    json.dump(best, f, indent=2)

# also save the full grid run log snapshot
grid_path = PROC_DIR / "phase2_gridsearch_runs.csv"
if grid_path.exists():
    pd.read_csv(grid_path).to_csv(art_dir / "gridsearch_runs.csv", index=False)

#save test predictions with dates aligned to test targets
test_dates = test_df["date"].values
test_dates_seq = test_dates

pred_df = pd.DataFrame({
    "date": pd.to_datetime(test_dates_seq),
    "y_true": y_test.values.astype(float),
    "y_pred": y_pred_te.astype(float)
})

pred_df.to_csv(art_dir / "test_predictions.csv", index=False)

print("saved artifacts to:", art_dir)
display(pred_df.head())
display(pred_df.tail())


saved artifacts to: /media/workhorse/Windows/MyWorkPlace/Projects/StockPredictor/data/processed/phase2_artifacts


Unnamed: 0,date,y_true,y_pred
0,2023-05-11,0.042041,0.002842
1,2023-05-12,0.062991,7.1e-05
2,2023-05-15,0.063277,0.005354
3,2023-05-16,0.066763,0.007906
4,2023-05-17,0.074302,0.010474


Unnamed: 0,date,y_true,y_pred
611,2025-12-17,-0.061874,0.018012
612,2025-12-18,-0.098327,0.018318
613,2025-12-19,-0.099907,0.018826
614,2025-12-22,-0.087169,0.020319
615,2025-12-23,-0.093535,0.024743
