In [29]:
import yfinance as yf
import pandas as pd
import numpy as np
import torch
import tensorflow as tf
import torch.nn as nn
import torch.optim as optim 
from sklearn.metrics import mean_squared_error, r2_score



In [2]:
# Define the tickers for the financial assets
tickers = {
    'S&P 500': '^GSPC',
    'FTSE 100': '^FTSE',
    'Nikkei 225': '^N225',
    'Gold ETF': 'GLD',  
    'US Treasury Bonds': 'TLT' 
}

# Define the time period for the data
start_date = '2010-01-01'
end_date = '2020-12-31'

# Download data for each ticker
data = {}
for asset, ticker in tickers.items():
    data[asset] = yf.download(ticker, start=start_date, end=end_date)

  data[asset] = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data[asset] = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data[asset] = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data[asset] = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data[asset] = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


In [3]:
data

{'S&P 500': Price             Close         High          Low         Open      Volume
 Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC
 Date                                                                      
 2010-01-04  1132.989990  1133.869995  1116.560059  1116.560059  3991400000
 2010-01-05  1136.520020  1136.630005  1129.660034  1132.660034  2491020000
 2010-01-06  1137.140015  1139.189941  1133.949951  1135.709961  4972660000
 2010-01-07  1141.689941  1142.459961  1131.319946  1136.270020  5270680000
 2010-01-08  1144.979980  1145.390015  1136.219971  1140.520020  4389590000
 ...                 ...          ...          ...          ...         ...
 2020-12-23  3690.010010  3711.239990  3689.280029  3693.419922  3779160000
 2020-12-24  3703.060059  3703.820068  3689.320068  3694.030029  1883780000
 2020-12-28  3735.360107  3740.510010  3723.030029  3723.030029  3535460000
 2020-12-29  3727.040039  3756.120117  3723.310059  3750.010010  3393290000
 

In [4]:
# Function to calculate log returns
def calculate_log_returns(df):
    df['Log Return'] = df['Close'].pct_change().apply(lambda x: np.log(1 + x))
    return df

# Calculate log returns for each asset
for asset in data:
    data[asset] = calculate_log_returns(data[asset])

print(data['S&P 500'].tail())

Price             Close         High          Low         Open      Volume  \
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC   
Date                                                                         
2020-12-23  3690.010010  3711.239990  3689.280029  3693.419922  3779160000   
2020-12-24  3703.060059  3703.820068  3689.320068  3694.030029  1883780000   
2020-12-28  3735.360107  3740.510010  3723.030029  3723.030029  3535460000   
2020-12-29  3727.040039  3756.120117  3723.310059  3750.010010  3393290000   
2020-12-30  3732.040039  3744.629883  3730.209961  3736.189941  3154850000   

Price      Log Return  
Ticker                 
Date                   
2020-12-23   0.000746  
2020-12-24   0.003530  
2020-12-28   0.008685  
2020-12-29  -0.002230  
2020-12-30   0.001341  


In [5]:
# Add 5-day and 21-day moving averages for each asset
for asset, df in data.items():
    df['5-day MA'] = df['Close'].rolling(window=5).mean()
    df['21-day MA'] = df['Close'].rolling(window=21).mean()


print(data['S&P 500'].tail())


Price             Close         High          Low         Open      Volume  \
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC   
Date                                                                         
2020-12-23  3690.010010  3711.239990  3689.280029  3693.419922  3779160000   
2020-12-24  3703.060059  3703.820068  3689.320068  3694.030029  1883780000   
2020-12-28  3735.360107  3740.510010  3723.030029  3723.030029  3535460000   
2020-12-29  3727.040039  3756.120117  3723.310059  3750.010010  3393290000   
2020-12-30  3732.040039  3744.629883  3730.209961  3736.189941  3154850000   

Price      Log Return     5-day MA    21-day MA  
Ticker                                           
Date                                             
2020-12-23   0.000746  3700.815967  3674.680466  
2020-12-24   0.003530  3696.931982  3677.901902  
2020-12-28   0.008685  3702.122021  3682.935721  
2020-12-29  -0.002230  3708.546045  3687.159052  
2020-12-30   0.001341  37

In [6]:
# Add 21-day rolling volatility for each asset
for asset, df in data.items():
    df['Volatility'] = df['Log Return'].rolling(window=21).std()


print(data['S&P 500'].tail())


Price             Close         High          Low         Open      Volume  \
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC   
Date                                                                         
2020-12-23  3690.010010  3711.239990  3689.280029  3693.419922  3779160000   
2020-12-24  3703.060059  3703.820068  3689.320068  3694.030029  1883780000   
2020-12-28  3735.360107  3740.510010  3723.030029  3723.030029  3535460000   
2020-12-29  3727.040039  3756.120117  3723.310059  3750.010010  3393290000   
2020-12-30  3732.040039  3744.629883  3730.209961  3736.189941  3154850000   

Price      Log Return     5-day MA    21-day MA Volatility  
Ticker                                                      
Date                                                        
2020-12-23   0.000746  3700.815967  3674.680466   0.006239  
2020-12-24   0.003530  3696.931982  3677.901902   0.005307  
2020-12-28   0.008685  3702.122021  3682.935721   0.005537  
2020-12-2

In [7]:
# Drop rows with missing values 
for asset in data:
    data[asset].dropna(inplace=True)


print(data['S&P 500'].tail())


Price             Close         High          Low         Open      Volume  \
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC   
Date                                                                         
2020-12-23  3690.010010  3711.239990  3689.280029  3693.419922  3779160000   
2020-12-24  3703.060059  3703.820068  3689.320068  3694.030029  1883780000   
2020-12-28  3735.360107  3740.510010  3723.030029  3723.030029  3535460000   
2020-12-29  3727.040039  3756.120117  3723.310059  3750.010010  3393290000   
2020-12-30  3732.040039  3744.629883  3730.209961  3736.189941  3154850000   

Price      Log Return     5-day MA    21-day MA Volatility  
Ticker                                                      
Date                                                        
2020-12-23   0.000746  3700.815967  3674.680466   0.006239  
2020-12-24   0.003530  3696.931982  3677.901902   0.005307  
2020-12-28   0.008685  3702.122021  3682.935721   0.005537  
2020-12-2

In [8]:
# Combine data for all assets into a single DataFrame
merged_data = pd.DataFrame()

for asset, df in data.items():
    asset_data = df[['Log Return', '5-day MA', '21-day MA', 'Volatility']]
    asset_data.columns = [f'{asset} Log Return', f'{asset} 5-day MA', f'{asset} 21-day MA', f'{asset} Volatility']
    merged_data = pd.concat([merged_data, asset_data], axis=1)


print(merged_data.tail())


            S&P 500 Log Return  S&P 500 5-day MA  S&P 500 21-day MA  \
Date                                                                  
2020-12-24            0.003530       3696.931982        3677.901902   
2020-12-25                 NaN               NaN                NaN   
2020-12-28            0.008685       3702.122021        3682.935721   
2020-12-29           -0.002230       3708.546045        3687.159052   
2020-12-30            0.001341       3717.502051        3692.416678   

            S&P 500 Volatility  FTSE 100 Log Return  FTSE 100 5-day MA  \
Date                                                                     
2020-12-24            0.005307             0.000969        6498.900000   
2020-12-25                 NaN                  NaN                NaN   
2020-12-28            0.005537                  NaN                NaN   
2020-12-29            0.005586             0.015353        6509.220020   
2020-12-30            0.005428            -0.007129       

In [9]:
for asset in data:
    merged_data.dropna(inplace=True)

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

# Drop the last row from both the features and target to align their lengths
features = merged_data.values[:-1]  # Drop the last row of features to match target length
target = merged_data['S&P 500 Log Return'].shift(-1).dropna().values  # Shift target and drop NaN values

# Ensure that both features and target arrays have the same length after dropping the last row
assert len(features) == len(target), f"Features and target length mismatch: {len(features)} != {len(target)}"

# Normalize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, shuffle=False)

# Reshape the data for LSTM input (samples, timesteps, features)
X_train_lstm = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Check the shape of the data
print(X_train_lstm.shape, X_test_lstm.shape)


(2028, 1, 20) (508, 1, 20)


In [18]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Build the LSTM model
model_lstm = Sequential()

# Add LSTM layer (units = 50, return_sequences=False to output only the final time step)
model_lstm.add(LSTM(units=50, return_sequences=False, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))

# Add Dense layer for prediction
model_lstm.add(Dense(units=1))  # Predicting a single value (next day's return)

# Compile the model
model_lstm.compile(optimizer='adam', loss='mean_squared_error')

# Train the LSTM model
history_lstm = model_lstm.fit(X_train_lstm, y_train, epochs=10, batch_size=32, validation_data=(X_test_lstm, y_test))

# Evaluate the model on the test set
lstm_loss = model_lstm.evaluate(X_test_lstm, y_test)
print("LSTM Model Loss:", lstm_loss)

y_pred_lstm = model_lstm.predict(X_test_lstm)


  super().__init__(**kwargs)


Epoch 1/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - loss: 0.0028 - val_loss: 0.0017
Epoch 2/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2.7163e-04 - val_loss: 0.0015
Epoch 3/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.7007e-04 - val_loss: 0.0010
Epoch 4/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.4634e-04 - val_loss: 8.3306e-04
Epoch 5/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.5494e-04 - val_loss: 6.5558e-04
Epoch 6/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.2904e-04 - val_loss: 6.3838e-04
Epoch 7/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.2476e-04 - val_loss: 5.7146e-04
Epoch 8/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.3518e-04 - val_loss: 6.0804e-04
Epoch 9

In [19]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Reshape the input to match (seq_len, batch_size, input_dim)
# Since you have a single timestep (sequence length = 1), you can reshape to (1, batch_size, input_dim)
X_train_tensor = X_train_tensor.unsqueeze(0)  # Adding sequence length dimension (1)
X_test_tensor = X_test_tensor.unsqueeze(0)  # Adding sequence length dimension (1)

# Check the new shapes
print(X_train_tensor.shape, X_test_tensor.shape)  # Should be (1, batch_size, input_dim)
print(y_train_tensor.shape, y_test_tensor.shape)

torch.Size([1, 2028, 20]) torch.Size([1, 508, 20])
torch.Size([2028]) torch.Size([508])


In [20]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_heads, num_layers, output_dim):
        super(TransformerModel, self).__init__()
        
        # Transformer block (No target input, only source)
        self.transformer = nn.Transformer(d_model=input_dim, nhead=num_heads, num_encoder_layers=num_layers)
        
        # Fully connected layer for output
        self.fc = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        # x is of shape (batch_size, seq_len, input_dim)
        x = x.permute(1, 0, 2)  # Change shape to (seq_len, batch_size, input_dim) for Transformer
        
        # Apply the Transformer
        # Since we don't need a target (tgt), we will use the same input `x` as the input to the Transformer
        x = self.transformer(x, x)  # Pass the same tensor as both source and target
        
        # Pooling across the sequence length (mean pooling)
        x = x.mean(dim=0)  # Pooling across the sequence length
        x = self.fc(x)
        return x


# Initialize model
model_transformer = TransformerModel(input_dim=X_train.shape[1], num_heads=4, num_layers=2, output_dim=1)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model_transformer.parameters(), lr=0.001)


In [21]:
# Training the model
epochs = 10
for epoch in range(epochs):
    model_transformer.train()
    
    # Forward pass
    optimizer.zero_grad()
    y_pred = model_transformer(X_train_tensor)
    
    # Compute loss
    loss = criterion(y_pred, y_train_tensor.view(-1, 1))
    
    # Backward pass
    loss.backward()
    optimizer.step()
    
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')

# Evaluate the Transformer model on test data
model_transformer.eval()
with torch.no_grad():
    y_pred_transformer = model_transformer(X_test_tensor)
    transformer_loss = criterion(y_pred_transformer, y_test_tensor.view(-1, 1))

print("Transformer Model Loss:", transformer_loss.item())


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/10], Loss: 0.014851425774395466
Epoch [2/10], Loss: 2.1736502647399902
Epoch [3/10], Loss: 0.4221070110797882
Epoch [4/10], Loss: 0.0003210810536984354
Epoch [5/10], Loss: 0.11240652203559875
Epoch [6/10], Loss: 0.15187445282936096
Epoch [7/10], Loss: 0.09505989402532578
Epoch [8/10], Loss: 0.03072057105600834
Epoch [9/10], Loss: 0.0013898792676627636
Epoch [10/10], Loss: 0.008257371373474598
Transformer Model Loss: 0.04923277720808983


  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
# LSTM Evaluation
mse_lstm = mean_squared_error(y_test, y_pred_lstm)
r2_lstm = r2_score(y_test, y_pred_lstm)
print(f"LSTM MSE: {mse_lstm}, R^2: {r2_lstm}")

# Transformer Evaluation
mse_transformer = mean_squared_error(y_test, y_pred_transformer.cpu().detach().numpy().flatten())
r2_transformer = r2_score(y_test, y_pred_transformer.numpy())
print(f"Transformer MSE: {mse_transformer}, R^2: {r2_transformer}")


LSTM MSE: 0.0005823844981462914, R^2: -1.1372094351232964


ValueError: Found input variables with inconsistent numbers of samples: [508, 1]

Removing Overfitting

Method 1: With Hyperparameters  hidden_dim=64

In [42]:
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout=0.2):
        super(LSTMModel, self).__init__()
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, 1)  # Output layer
        
    def forward(self, x):
        # x: (batch_size, seq_len, input_dim)
        lstm_out, (hn, cn) = self.lstm(x)
        # We only want the output of the last timestep
        out = self.fc(lstm_out[:, -1, :])
        return out

In [None]:
# Hyperparameters
input_dim = X_train.shape[1]  # Number of features
hidden_dim = 64  # Number of LSTM hidden units
num_layers = 2  # Number of LSTM layers
dropout = 0.3  # Dropout rate to prevent overfitting
batch_size = 64
learning_rate = 0.001
epochs = 50

# Prepare the data
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Reshape for LSTM input (batch_size, seq_len, input_dim)
X_train_tensor = X_train_tensor.unsqueeze(1)  
X_test_tensor = X_test_tensor.unsqueeze(1)

# Initialize the model
model = LSTMModel(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop with early stopping
best_loss = float('inf')
patience = 5  # Number of epochs to wait before stopping if no improvement
patience_counter = 0

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    y_pred = model(X_train_tensor)
    
    # Compute loss
    loss = criterion(y_pred, y_train_tensor.view(-1, 1))
    
    # Backward pass
    loss.backward()
    optimizer.step()

    # Check if we should stop early
    if loss.item() < best_loss:
        best_loss = loss.item()
        patience_counter = 0
    else:
        patience_counter += 1
        
    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch+1} with loss {loss.item()}")
        break
    
    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")

# Evaluation
model.eval()
with torch.no_grad():
    # Get predictions
    y_pred = model(X_test_tensor)
    
    # Convert predictions to NumPy
    y_pred_numpy = y_pred.cpu().detach().numpy().flatten()
    
    # Calculate MSE and R²
    mse = mean_squared_error(y_test, y_pred_numpy)
    r2 = r2_score(y_test, y_pred_numpy)
    
    print(f"LSTM Model MSE: {mse}, R²: {r2}")


Epoch [10/50], Loss: 0.004320289473980665
Epoch [20/50], Loss: 0.0005033991765230894
Early stopping at epoch 24 with loss 0.0007177990046329796
LSTM Model MSE: 0.003222272139579327, R²: -10.824954890049256


Method 2: With Hyperparameters  hidden_dim=128

In [41]:

hidden_dim = 128  
num_layers = 3    
dropout = 0.2     
learning_rate = 0.0005  
epochs = 100  
patience = 10  


X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)


X_train_tensor = X_train_tensor.unsqueeze(1) 
X_test_tensor = X_test_tensor.unsqueeze(1)    

model = LSTMModel(input_dim=X_train.shape[1], hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop with early stopping
best_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    y_pred = model(X_train_tensor)
    
    # Compute loss
    loss = criterion(y_pred, y_train_tensor.view(-1, 1))
    
    # Backward pass
    loss.backward()
    optimizer.step()

    # Check if we should stop early
    if loss.item() < best_loss:
        best_loss = loss.item()
        patience_counter = 0
    else:
        patience_counter += 1
    
    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch+1} with loss {loss.item()}")
        break
    
    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")

# Evaluate the LSTM model on the test data
model.eval()
with torch.no_grad():
    y_pred_lstm = model(X_test_tensor)
    
    # Convert predictions to NumPy
    y_pred_numpy = y_pred_lstm.cpu().detach().numpy().flatten()
    
    # Calculate MSE, R², and MAE
    mse_lstm = mean_squared_error(y_test, y_pred_numpy)
    r2_lstm = r2_score(y_test, y_pred_numpy)
    mae_lstm = mean_absolute_error(y_test, y_pred_numpy)
    
    print(f"LSTM Model MSE: {mse_lstm}, R²: {r2_lstm}, MAE: {mae_lstm}")


Epoch [10/100], Loss: 9.962165495380759e-05
Epoch [20/100], Loss: 0.00021199113689363003
Early stopping at epoch 21 with loss 0.000183356532943435
LSTM Model MSE: 0.00030985773410140605, R²: -0.13710250697830606, MAE: 0.012230074620605533


In [40]:
# Hyperparameters for more complex model
hidden_dim = 256  # Increased hidden units
num_layers = 3    # Increased number of layers
dropout = 0.1     # Moderate dropout rate
learning_rate = 0.0003  # Reduced learning rate
epochs = 100      # Increase epochs for more thorough training
patience = 10     # Increased patience for early stopping

# Prepare the data (assuming X_train_tensor and y_train_tensor are already defined)
X_train_tensor = torch.tensor(X_train_lstm, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_lstm, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Initialize the LSTM model
model = LSTMModel(input_dim=X_train_lstm.shape[2], hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop with early stopping
best_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    y_pred = model(X_train_tensor)
    
    # Compute loss
    loss = criterion(y_pred, y_train_tensor.view(-1, 1))
    
    # Backward pass
    loss.backward()
    optimizer.step()

    # Early stopping condition
    if loss.item() < best_loss:
        best_loss = loss.item()
        patience_counter = 0
    else:
        patience_counter += 1
    
    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch+1} with loss {loss.item()}")
        break
    
    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")

# Evaluate the LSTM model on the test data
model.eval()
with torch.no_grad():
    y_pred_lstm = model(X_test_tensor)
    
    # Convert predictions to NumPy
    y_pred_numpy = y_pred_lstm.cpu().detach().numpy().flatten()
    
    # Calculate MSE, R², and MAE
    mse_lstm = mean_squared_error(y_test, y_pred_numpy)
    r2_lstm = r2_score(y_test, y_pred_numpy)
    mae_lstm = mean_absolute_error(y_test, y_pred_numpy)
    
    print(f"LSTM Model MSE: {mse_lstm}, R²: {r2_lstm}, MAE: {mae_lstm}")


Epoch [10/100], Loss: 0.00014450604794546962
Early stopping at epoch 15 with loss 8.879851520759985e-05
LSTM Model MSE: 0.0002683249700953755, R²: 0.015312633505293327, MAE: 0.009686178304511775


In [39]:
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error

# Define a grid of hyperparameters to tune
param_grid = {
    'hidden_dim': [64, 128],  # Number of hidden units
    'num_layers': [2, 3],     # Number of LSTM layers
    'dropout': [0.1, 0.2],    # Dropout rate
    'learning_rate': [0.0003, 0.0005],  # Learning rate
    'epochs': [50, 100],      # Number of epochs
    'batch_size': [32, 64]    # Batch size
}

# Initialize best metrics
best_mse = float('inf')
best_params = {}

# Grid search loop
for params in ParameterGrid(param_grid):
    print(f"Training with params: {params}")

    hidden_dim = params['hidden_dim']
    num_layers = params['num_layers']
    dropout = params['dropout']
    learning_rate = params['learning_rate']
    epochs = params['epochs']
    batch_size = params['batch_size']
    

    X_train_tensor = torch.tensor(X_train_lstm, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test_lstm, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
    
    model = LSTMModel(input_dim=X_train_lstm.shape[2], hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)
    
    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Train the model
    best_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()

        # Forward pass
        y_pred = model(X_train_tensor)

        # Compute loss
        loss = criterion(y_pred, y_train_tensor.view(-1, 1))

        # Backward pass
        loss.backward()
        optimizer.step()

        # Early stopping condition
        if loss.item() < best_loss:
            best_loss = loss.item()
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= 10:  # Stop if no improvement
            print(f"Early stopping at epoch {epoch+1} with loss {loss.item()}")
            break
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")

    # Evaluate the model on the test data
    model.eval()
    with torch.no_grad():
        y_pred_lstm = model(X_test_tensor)
        y_pred_numpy = y_pred_lstm.cpu().detach().numpy().flatten()

        # Calculate evaluation metrics
        mse_lstm = mean_squared_error(y_test, y_pred_numpy)
        r2_lstm = r2_score(y_test, y_pred_numpy)
        mae_lstm = mean_absolute_error(y_test, y_pred_numpy)
        
        print(f"Test MSE: {mse_lstm}, R²: {r2_lstm}, MAE: {mae_lstm}")

    # Track the best parameters
    if mse_lstm < best_mse:
        best_mse = mse_lstm
        best_params = params
        print(f"Best model so far: MSE = {best_mse}, Params = {best_params}")

# Output the best hyperparameters
print(f"Best Hyperparameters: {best_params}")


Training with params: {'batch_size': 32, 'dropout': 0.1, 'epochs': 50, 'hidden_dim': 64, 'learning_rate': 0.0003, 'num_layers': 2}
Epoch [10/50], Loss: 0.006269329693168402
Epoch [20/50], Loss: 0.0034028906375169754
Epoch [30/50], Loss: 0.001526078674942255
Epoch [40/50], Loss: 0.0005605411133728921
Epoch [50/50], Loss: 0.000251695018960163
Test MSE: 0.002490169407748459, R²: -8.138315958331951, MAE: 0.040778098587653216
Best model so far: MSE = 0.002490169407748459, Params = {'batch_size': 32, 'dropout': 0.1, 'epochs': 50, 'hidden_dim': 64, 'learning_rate': 0.0003, 'num_layers': 2}
Training with params: {'batch_size': 32, 'dropout': 0.1, 'epochs': 50, 'hidden_dim': 64, 'learning_rate': 0.0003, 'num_layers': 3}
Epoch [10/50], Loss: 9.723919356474653e-05
Epoch [20/50], Loss: 0.00011713154526660219
Early stopping at epoch 22 with loss 0.00011293414718238637
Test MSE: 0.0002794740106351899, R²: -0.02560163311771335, MAE: 0.010053753588356118
Best model so far: MSE = 0.0002794740106351899,