# GARCH LSTM Forecasting

I will be making a GARCH-LSTM hybrid model to forecast S&P 500 returns using S&P 500 data and VIX downloaded from the yfinance package.

I will get both of these data from yahoo finance.

### Importing libraries and packages

In [20]:
import numpy as np
import pandas as pd

import yfinance

import torch
from torch import optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import r2_score, mean_squared_error

from arch import arch_model

### Datasets

In [21]:
start_date = '2005-01-01'
end_date = '2025-01-01'

vix = yfinance.download('^VIX', start=start_date, end=end_date, interval='1d', auto_adjust=False)
vix.columns = ['Adj_Close', 'Close', 'High', 'Low', 'Open', 'Volume']
vix.drop(columns='Volume')
print(vix.isna().sum().sum())
print(vix.head())
sp500 = yfinance.download('^GSPC', start=start_date, end=end_date, interval='1d', auto_adjust=False)
sp500.columns = ['Adj_Close', 'Close', 'High', 'Low', 'Open', 'Volume']

sp500.head()



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

0
            Adj_Close  Close   High    Low   Open  Volume
Date                                                     
2005-01-03      14.08  14.08  14.23  13.25  13.39       0
2005-01-04      13.98  13.98  14.45  13.93  14.01       0
2005-01-05      14.09  14.09  14.09  13.26  13.98       0
2005-01-06      13.58  13.58  14.09  13.33  14.09       0
2005-01-07      13.49  13.49  13.51  12.94  13.47       0





Unnamed: 0_level_0,Adj_Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-01-03,1202.079956,1202.079956,1217.800049,1200.319946,1211.920044,1510800000
2005-01-04,1188.050049,1188.050049,1205.839966,1185.390015,1202.079956,1721000000
2005-01-05,1183.73999,1183.73999,1192.72998,1183.719971,1188.050049,1738900000
2005-01-06,1187.890015,1187.890015,1191.630005,1183.27002,1183.73999,1569100000
2005-01-07,1186.189941,1186.189941,1192.199951,1182.160034,1187.890015,1477900000


### Model initialization

## Hybrid GARCH-LSTM Model
This model combines GARCH (baseline volatility) and LSTM (nonlinear corrections using GARCH residuals and VIX).

In [22]:
# to be implemented, somehow, (I'll try T_T)

LSTM model: (below is from a previous LSTM model I made)

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # Take last output
        out = self.fc(out)
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = LSTMModel().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)


Training

In [None]:
EPOCHS = 100
train_losses = []
for epoch in range(1, EPOCHS+1):
    model.train()
    epoch_loss = 0
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)
        xb = xb.view(xb.size(0), xb.size(1), 1)  # (batch, seq, 1)

        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        epoch_loss = epoch_loss + loss.item()*xb.size(0)
    
    avg_loss = epoch_loss / len(train_loader.dataset)
    train_losses.append(avg_loss)
    print(f"Epoch {epoch}/{EPOCHS}, Training Loss: {avg_loss:.6f}")

Eval

In [None]:
model.eval()
with torch.no_grad():
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    X_test_tensor = X_test_tensor.view(X_test_tensor.size(0), X_test_tensor.size(1), 1)
    preds = model(X_test_tensor).cpu().numpy()
    preds_inv = scaler.inverse_transform(preds)
    y_test_inv = scaler.inverse_transform(y_test)

r2 = r2_score(y_test_inv, preds_inv)
rmse = np.sqrt(mean_squared_error(y_test_inv, preds_inv))
print(f"R^2: {r2:.4f}")
print(f"RMSE: {rmse:.4f}")