In [1]:
import os
import json
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm

# For metrics and splitting
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import ParameterGrid

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cpu


In [3]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, dropout=0.0):
        super(MLP, self).__init__()
        layers = []
        prev_dim = input_dim
        
        for i in range(n_layers):
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.ReLU())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            prev_dim = hidden_dim
        # final output layer
        layers.append(nn.Linear(prev_dim, 1))
        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        # x shape: (batch_size, input_dim)
        return self.net(x)  # shape: (batch_size, 1)

In [4]:
class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, dropout=0.0):
        super(RNN, self).__init__()
        # We'll use simple RNN
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.rnn = nn.RNN(
            input_size=input_dim, 
            hidden_size=hidden_dim, 
            num_layers=n_layers, 
            batch_first=True,
            dropout=dropout if n_layers>1 else 0.0
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x shape: (batch_size, seq_len=1, input_dim)
        batch_size = x.size(0)
        h0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim, device=x.device)

        out, _ = self.rnn(x, h0) 
        # out shape: (batch_size, seq_len, hidden_dim)
        out = out[:, -1, :]   # take last time step
        out = self.fc(out)    # shape: (batch_size, 1)
        return out

In [5]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, dropout=0.0):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.lstm = nn.LSTM(
            input_size=input_dim, 
            hidden_size=hidden_dim, 
            num_layers=n_layers, 
            batch_first=True,
            dropout=dropout if n_layers>1 else 0.0
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x shape: (batch_size, seq_len=1, input_dim)
        batch_size = x.size(0)
        h0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim, device=x.device)
        c0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim, device=x.device)

        out, (hn, cn) = self.lstm(x, (h0, c0))
        # out shape: (batch_size, seq_len, hidden_dim)
        out = out[:, -1, :]
        out = self.fc(out) 
        return out

In [6]:
class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, dropout=0.0):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.gru = nn.GRU(
            input_size=input_dim, 
            hidden_size=hidden_dim, 
            num_layers=n_layers, 
            batch_first=True,
            dropout=dropout if n_layers>1 else 0.0
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim, device=x.device)

        out, hn = self.gru(x, h0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [7]:
def build_dl_model(model_name, input_dim, params):
    """
    Builds a PyTorch model instance given model_name and hyperparams.
    model_name: 'MLP', 'RNN', 'LSTM', 'GRU'
    params:
      - 'hidden_dim': dimension of hidden layer(s)
      - 'n_layers': number of layers (2,3,4)
      - 'dropout': dropout rate
      ...
    """
    hidden_dim = params.get("hidden_dim", 64)
    n_layers   = params.get("n_layers", 2)
    dropout    = params.get("dropout", 0.0)
    
    if model_name == "MLP":
        model = MLP(input_dim, hidden_dim, n_layers, dropout=dropout)
    elif model_name == "RNN":
        model = RNN(input_dim, hidden_dim, n_layers, dropout=dropout)
    elif model_name == "LSTM":
        model = LSTM(input_dim, hidden_dim, n_layers, dropout=dropout)
    elif model_name == "GRU":
        model = GRU(input_dim, hidden_dim, n_layers, dropout=dropout)
    else:
        raise ValueError(f"Unknown model type: {model_name}")
    
    return model.to(device)

In [8]:
def sliding_window_forecast_with_torch(
    df,
    target_col,
    model_name,
    model_params,
    window_size=25,
    test_ratio=0.2,
    drop_cols=None,
    log_dir="model_dl_logs",
    log_filename="prediction_log.csv"
):
    """
    Rolls a window of size window_size through the data.
    For each window:
      - split train/test by test_ratio
      - standard scale X and y
      - train a PyTorch model
      - predict and inverse scale
      - log metrics (MSE, MAPE, R²)
    """
    if drop_cols is None:
        drop_cols = []
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    log_path = os.path.join(log_dir, log_filename)
    log_entries = []

    df = df.dropna().sort_index()
    n = len(df)
    
    for start in range(0, n - window_size + 1):
        window_df = df.iloc[start:start + window_size].copy()
        X_window = window_df.drop(columns=[target_col] + drop_cols, errors='ignore')
        y_window = window_df[target_col].values

        train_size = int(len(X_window) * (1 - test_ratio))
        if train_size < 1 or train_size >= len(X_window):
            continue
        
        X_train = X_window.iloc[:train_size].values
        X_test  = X_window.iloc[train_size:].values
        y_train = y_window[:train_size]
        y_test  = y_window[train_size:]

        # Scale features
        scaler_X = StandardScaler()
        X_train_scaled = scaler_X.fit_transform(X_train)
        X_test_scaled  = scaler_X.transform(X_test)

        # Scale target
        scaler_y = StandardScaler()
        y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
        y_test_scaled  = scaler_y.transform(y_test.reshape(-1, 1)).ravel()

        # Format for model
        input_dim = X_train_scaled.shape[1]
        if model_name in ["RNN", "LSTM", "GRU"]:
            X_train_scaled = np.expand_dims(X_train_scaled, axis=1)
            X_test_scaled = np.expand_dims(X_test_scaled, axis=1)

        X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
        y_train_t = torch.tensor(y_train_scaled, dtype=torch.float32).view(-1, 1).to(device)
        X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
        y_test_t = torch.tensor(y_test_scaled, dtype=torch.float32).view(-1, 1).to(device)

        # Build and train model
        model = build_dl_model(model_name, input_dim, model_params)
        lr = model_params.get("learning_rate", 1e-3)
        optimizer = model_params.get("optimizer", "adam")
        epochs = model_params.get("epochs", 20)
        batch_size = model_params.get("batch_size", 32)

        if optimizer.lower() == "adam":
            opt = optim.Adam(model.parameters(), lr=lr)
        else:
            opt = optim.Adam(model.parameters(), lr=lr)

        criterion = nn.MSELoss()
        dataset_size = X_train_t.shape[0]
        num_batches = (dataset_size + batch_size - 1) // batch_size
        model.train()

        for ep in range(epochs):
            perm = torch.randperm(dataset_size)
            X_train_t = X_train_t[perm]
            y_train_t = y_train_t[perm]

            for b_idx in range(num_batches):
                start_b = b_idx * batch_size
                end_b = min(start_b + batch_size, dataset_size)
                xb = X_train_t[start_b:end_b]
                yb = y_train_t[start_b:end_b]

                opt.zero_grad()
                preds = model(xb)
                loss = criterion(preds, yb)
                loss.backward()
                opt.step()

        # Inference
        model.eval()
        with torch.no_grad():
            preds_test = model(X_test_t)
        preds_test = preds_test.cpu().numpy().ravel()
        y_true_test = y_test_t.cpu().numpy().ravel()

        # Inverse transform
        preds_test = scaler_y.inverse_transform(preds_test.reshape(-1, 1)).ravel()
        y_true_test = scaler_y.inverse_transform(y_true_test.reshape(-1, 1)).ravel()

        # Guard against invalid output
        if np.any(np.isnan(preds_test)) or np.any(np.isinf(preds_test)):
            print("⚠️ Skipping window due to NaN/inf in predictions.")
            continue

        mse = mean_squared_error(y_true_test, preds_test)
        mape = mean_absolute_percentage_error(y_true_test, preds_test)
        r2 = r2_score(y_true_test, preds_test)

        log_entry = {
            "model_name": model_name,
            "model_hyperparameters_dict": json.dumps(model_params),
            "window_size": window_size,
            "test_ratio": test_ratio,
            "start_date": str(df.index[start]),
            "end_date": str(df.index[start + window_size - 1]),
            "test_data_values_list": y_true_test.tolist(),
            "test_data_model_predictions_list": preds_test.tolist(),
            "MSE_score": mse,
            "MAPE_score": mape,
            "R^2_score": r2
        }
        log_entries.append(log_entry)

    log_df = pd.DataFrame(log_entries)
    if os.path.exists(log_path):
        existing = pd.read_csv(log_path)
        log_df = pd.concat([existing, log_df], ignore_index=True)
    log_df.to_csv(log_path, index=False)
    return log_df

In [9]:
def tune_torch_model(
    df,
    target_col,
    model_type,
    param_grid,
    window_sizes=[25, 50],
    test_ratio=0.2,
    drop_cols=None,
    log_dir="model_dl_logs"
):
    """
    Similar approach as classical ML tune_model. 
    We'll iterate over param_grid, run sliding_window_forecast_with_torch, 
    log results, average, and return summary.
    """
    if drop_cols is None:
        drop_cols = []
    
    param_list = list(ParameterGrid(param_grid))
    total_iterations = len(param_list) * len(window_sizes)
    pbar = tqdm(total=total_iterations, desc=f"Tuning {model_type}")
    
    summaries = []
    for w in window_sizes:
        for params in param_list:
            log_df = sliding_window_forecast_with_torch(
                df=df,
                target_col=target_col,
                model_name=model_type,
                model_params=params,
                window_size=w,
                test_ratio=test_ratio,
                drop_cols=drop_cols,
                log_dir=log_dir,
                log_filename=f"{model_type}_window_{w}.csv"
            )
            
            pbar.update(1)
            if log_df.empty:
                continue

            avg_mse  = log_df["MSE_score"].mean()
            avg_mape = log_df["MAPE_score"].mean()
            avg_r2   = log_df["R^2_score"].mean()

            summary_entry = {
                "model_name": model_type,
                "model_hyperparameters_dict": json.dumps(params),
                "window_size": w,
                "test_ratio": test_ratio,
                "avg_MSE": avg_mse,
                "avg_MAPE": avg_mape,
                "avg_R^2": avg_r2
            }
            summaries.append(summary_entry)
    pbar.close()
    
    if summaries:
        return pd.DataFrame(summaries)
    else:
        return pd.DataFrame()

In [10]:
def combine_and_top_logs(log_dir="model_dl_logs", tops=5):
    """
    Reads all CSV files in the given log directory, 
    groups by model_name and hyperparameters, 
    picks top "tops" by MAPE ascending for each model.
    """
    all_files = [f for f in os.listdir(log_dir) if f.endswith(".csv")]
    if not all_files:
        print(f"No CSV log files in {log_dir} directory.")
        return pd.DataFrame()
    
    dfs = [pd.read_csv(os.path.join(log_dir, f)) for f in all_files]
    combined_logs = pd.concat(dfs, ignore_index=True)
    
    grouped_summary = combined_logs.groupby(
        ["model_name","model_hyperparameters_dict"]
    )[["MSE_score","MAPE_score","R^2_score"]].mean().reset_index()

    top_n_list = []
    for model in grouped_summary["model_name"].unique():
        sub = grouped_summary[grouped_summary["model_name"] == model]
        top_n = sub.sort_values("MAPE_score", ascending=True).head(tops)
        top_n_list.append(top_n)
    top_n_combined = pd.concat(top_n_list, ignore_index=True)
    return top_n_combined

In [11]:
param_grids_torch = {
    "MLP": {
        "hidden_dim":    [32, 64, 128],
        "n_layers":      [1, 2, 3],
        "dropout":       [0.0, 0.2, 0.4],                # Regularization
        "learning_rate": [1e-2, 1e-3, 1e-4],             # Wider range
        "optimizer":     ["adam"],
        "epochs":        [25, 50, 100],                  # Reasonable ranges
        "batch_size":    [16, 32, 64],
        "weight_decay":  [0.0, 1e-4, 1e-3],              # L2 regularization
    },

    "RNN": {
        "hidden_dim":    [32, 64, 128],
        "n_layers":      [1, 2, 3],
        "dropout":       [0.0, 0.2, 0.4], 
        "learning_rate": [1e-2, 1e-3, 1e-4],
        "optimizer":     ["adam"],
        "epochs":        [25, 50, 100],
        "batch_size":    [16, 32, 64],
        "weight_decay":  [0.0, 1e-4, 1e-3]
    },

    "LSTM": {
        "hidden_dim":    [64, 128, 256],
        "n_layers":      [1, 2, 3],
        "dropout":       [0.0, 0.2, 0.4], 
        "learning_rate": [1e-2, 1e-3, 1e-4],
        "optimizer":     ["adam"],
        "epochs":        [25, 50, 100],
        "batch_size":    [16, 32, 64],
        "weight_decay":  [0.0, 1e-4, 1e-3]
    },

    "GRU": {
        "hidden_dim":    [64, 128, 256],
        "n_layers":      [1, 2, 3],
        "dropout":       [0.0, 0.2, 0.4], 
        "learning_rate": [1e-2, 1e-3, 1e-4],
        "optimizer":     ["adam"],
        "epochs":        [25, 50, 100],
        "batch_size":    [16, 32, 64],
        "weight_decay":  [0.0, 1e-4, 1e-3]
    }
}

In [12]:
with open("temp_output/quarterly_X_y.pkl", "rb") as f:
    data_q = pickle.load(f)

drop_columns = []
print(data_q.keys())  # Should show all the tickers like AAPL, MSFT, etc.
quaterly_data = data_q["AAPL"]

dict_keys(['AAPL', 'MSFT', 'LLY', 'UNH', 'V', 'MA', 'GOOGL', 'META', 'AMZN', 'TSLA', 'PG', 'WMT', 'RTX', 'UNP', 'XOM', 'CVX', 'LIN', 'SHW', 'AMT', 'PLD', 'NEE', 'SO'])


In [None]:
# MLP on quarterly
mlp_summary_q = tune_torch_model(
    df=quaterly_data,
    target_col="y",
    model_type="MLP",
    param_grid=param_grids_torch["MLP"],
    window_sizes=[10],  # or [25,50]
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_q"
)

# RNN on quarterly
rnn_summary_q = tune_torch_model(
    df=quaterly_data,
    target_col="y",
    model_type="RNN",
    param_grid=param_grids_torch["RNN"],
    window_sizes=[10],
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_q"
)

# LSTM on quarterly
lstm_summary_q = tune_torch_model(
    df=quaterly_data,
    target_col="y",
    model_type="LSTM",
    param_grid=param_grids_torch["LSTM"],
    window_sizes=[10],
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_q"
)

# GRU on quarterly
gru_summary_q = tune_torch_model(
    df=quaterly_data,
    target_col="y",
    model_type="GRU",
    param_grid=param_grids_torch["GRU"],
    window_sizes=[10],
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_q"
)





[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



Tuning RNN:   0%|          | 8/2187 [00:59<4:29:30,  7.42s/it]
Tuning MLP:   4%|▍         | 83/2187 [02:29<1:03:00,  1.80s/it]
Tuning RNN:   0%|          | 9/2187 [00:55<3:41:50,  6.11s/it]
Tuning RNN:   1%|          | 11/2187 [00:48<2:39:34,  4.40s/it]


KeyboardInterrupt: 

In [None]:
combined_summary_q = pd.concat([rf_summary_q, svm_summary_q, gb_summary_q, xgb_summary_q], ignore_index=True)
print("Combined Tuning Summary:")
display(combined_summary_q)

# Alternatively, read all prediction log files from log directory
top5_summary_q = combine_and_top_logs(log_dir="model_dl_logs_q")
print("Top 5 Configurations per Model:")
display(top5_summary_q)

In [None]:
with open("temp_output/daily_X_y.pkl", "rb") as f:
    data_d = pickle.load(f)
    
drop_columns = []
print(data_d.keys())  # Should show all the tickers like AAPL, MSFT, etc.
daily_data = data_d["AAPL"]
daily_data = daily_data.iloc[-128:,:]
daily_data

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume', '^GSPC', '^IXIC',
       '^DJI', '^VIX', 'CL=F', 'GC=F', 'SI=F', '^TNX', 'DX-Y.NYB',
       'FedFundsRate', 'SMA5', 'SMA50', 'SMA200', 'MACDLine', 'MACDSignal',
       'MACDHist', 'RSI14', 'BBupper', 'BBlower', 'ROC12', 'PPO', 'MOM5',
       'StochK', 'StochD', 'WillR', 'AccDist', 'PlusDI14', 'TR', 'SlowStochD',
       'ChaikinOsc', 'ADX14', 'ATR14', 'y'],
      dtype='object')


In [None]:
# MLP
mlp_summary_d = tune_torch_model(
    df=daily_data,
    target_col="y",
    model_type="MLP",
    param_grid=param_grids_torch["MLP"],
    window_sizes=[25],
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_d"
)

# RNN
rnn_summary_d = tune_torch_model(
    df=daily_data,
    target_col="y",
    model_type="RNN",
    param_grid=param_grids_torch["RNN"],
    window_sizes=[25],
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_d"
)

# LSTM
lstm_summary_d = tune_torch_model(
    df=daily_data,
    target_col="y",
    model_type="LSTM",
    param_grid=param_grids_torch["LSTM"],
    window_sizes=[25],
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_d"
)

# GRU
gru_summary_d = tune_torch_model(
    df=daily_data,
    target_col="y",
    model_type="GRU",
    param_grid=param_grids_torch["GRU"],
    window_sizes=[25],
    test_ratio=0.2,
    drop_cols=drop_columns,
    log_dir="model_dl_logs_d"
)

Tuning RF: 100%|██████████| 48/48 [34:52<00:00, 43.59s/it]
Tuning SVM: 100%|██████████| 798/798 [03:36<00:00,  3.68it/s]
Tuning GB: 100%|██████████| 50/50 [51:04<00:00, 61.29s/it] 
Tuning XGB: 100%|██████████| 450/450 [58:51:49<00:00, 470.91s/it]   


In [None]:
combined_summary_d = pd.concat([rf_summary_d, svm_summary_d, gb_summary_d, xgb_summary_d], ignore_index=True)
print("Combined Tuning Summary:")
display(combined_summary_d)

# Alternatively, read all prediction log files from log directory
top5_summary_d = combine_and_top_logs(log_dir="model_dl_logs_d")
print("Top 5 Configurations per Model:")
display(top5_summary_d)

In [None]:
top_summary_q = combine_and_top_logs(log_dir="model_dl_logs_q", tops=25)
for d in top_summary_q['model_hyperparameters_dict']:
    print(json.loads(d))
    
print("\n"+"="*100+"\n")

top_summary_d = combine_and_top_logs(log_dir="model_dl_logs_d", tops=25)
for d in top_summary_d['model_hyperparameters_dict']:
    print(json.loads(d))

{'learning_rate': 0.03, 'max_depth': 2, 'n_estimators': 8000}
{'learning_rate': 0.03, 'max_depth': 2, 'n_estimators': 6400}
{'learning_rate': 0.03, 'max_depth': 2, 'n_estimators': 4800}
{'learning_rate': 0.03, 'max_depth': 2, 'n_estimators': 3200}
{'learning_rate': 0.03, 'max_depth': 2, 'n_estimators': 1600}
{'learning_rate': 0.04, 'max_depth': 2, 'n_estimators': 8000}
{'learning_rate': 0.04, 'max_depth': 2, 'n_estimators': 6400}
{'learning_rate': 0.04, 'max_depth': 2, 'n_estimators': 4800}
{'learning_rate': 0.04, 'max_depth': 2, 'n_estimators': 3200}
{'learning_rate': 0.04, 'max_depth': 2, 'n_estimators': 1600}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 4800}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 6400}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 8000}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 3200}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 1600}
{'learning_rate': 0.05, 'max_depth': 2, 'n_estimators': 1600}
{'learni