In [21]:
import torch
import torch.nn as nn
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import torch.optim as optim



In [22]:

data = pd.read_csv('composite_stocks.csv', index_col='date', parse_dates=True)

#uncomment this to only train one index dataset
#data = data[['GSPC']] 


scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)


def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length]
        xs.append(x)
        ys.append([y])  
    return np.array(xs), np.array(ys).reshape(-1, 1)

seq_length = 30  # number of days to look back
X, y = create_sequences(data_scaled, seq_length)


train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


In [23]:

class StockDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]


In [24]:
# data loaders
batch_size = 16
train_dataset = StockDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
test_dataset = StockDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [25]:

for i, (features, targets) in enumerate(train_loader):
    print(f"Batch {i+1}")
    print("Features shape:", features.shape) 
    print("Features dtype:", features.dtype)  
    print("Targets shape:", targets.shape)  
    print("Targets dtype:", targets.dtype)   
    if i == 2:  # check the first 3 batches
        break


Batch 1
Features shape: torch.Size([16, 30, 3])
Features dtype: torch.float32
Targets shape: torch.Size([16, 1])
Targets dtype: torch.float32
Batch 2
Features shape: torch.Size([16, 30, 3])
Features dtype: torch.float32
Targets shape: torch.Size([16, 1])
Targets dtype: torch.float32
Batch 3
Features shape: torch.Size([16, 30, 3])
Features dtype: torch.float32
Targets shape: torch.Size([16, 1])
Targets dtype: torch.float32


In [26]:
from lstm import LSTMModel

input_dim = 10  # Assuming each input feature vector has 10 features
hidden_dim = 50  # Number of LSTM units
layer_dim = 2  # Number of stacked LSTM layers

device = "cpu"
model = LSTMModel(input_dim, hidden_dim, layer_dim).to(device)


optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()  # Using Mean Squared Error Loss for a regression task
epochs = 10

model.train()
for epoch in range(epochs):
    total_loss = 0
    for features, targets in train_loader:
        features, targets = features.to(device).float(), targets.to(device).float()
        optimizer.zero_grad()
        output = model(features)
        output = output.squeeze()
        loss = criterion(output, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        if epoch == 0:
            print("Output shape:", output.shape)
            print("Targets shape:", targets.shape)

    print(f'Epoch {epoch+1}, Training Loss: {total_loss / len(train_loader)}')


Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])


  return F.mse_loss(input, target, reduction=self.reduction)


Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])
Targets shape: torch.Size([16, 1])
Output shape: torch.Size([16, 3])


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 2, Training Loss: 0.008508978295139968
Epoch 3, Training Loss: 0.00763050022093618
Epoch 4, Training Loss: 0.006629112296246199
Epoch 5, Training Loss: 0.005483313243732684
Epoch 6, Training Loss: 0.005358929059891832
Epoch 7, Training Loss: 0.005059856270438306
Epoch 8, Training Loss: 0.005058200433672432
Epoch 9, Training Loss: 0.004912676096056303
Epoch 10, Training Loss: 0.004790227906293391


In [27]:
import torch
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

def evaluate_model(model, data_loader, device):
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for features, targets in data_loader:
            features = features.to(device).float()
            targets = targets.to(device).float()
            outputs = model(features)
            outputs = outputs[:, -1].unsqueeze(1)  # Adjusting the output to match target shape

            predictions.extend(outputs.detach().cpu().numpy())
            actuals.extend(targets.detach().cpu().numpy())

    predictions = np.array(predictions)
    actuals = np.array(actuals)

    mse = mean_squared_error(actuals, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actuals, predictions)
    mape = np.mean(np.abs((actuals - predictions) / (actuals + np.finfo(float).eps))) * 100

    print(f'Evaluation Results - RMSE: {rmse:.3f}, MAE: {mae:.3f}, MAPE: {mape:.2f}%')

# Then run the evaluation again
evaluate_model(model, test_loader, device)


Evaluation Results - RMSE: 0.393, MAE: 0.337, MAPE: 55.88%


In [31]:
def predict_stock_indices(model, data_loader, scaler):
    model.eval()  # Set the model to evaluation mode to turn off dropout, batchnorm, etc.
    predictions = []
    actuals = []
    
    with torch.no_grad():  # No need to track gradients for prediction
        for features, targets in data_loader:
            features = features.float()  # Ensure data is in float
            outputs = model(features)
            
            # Assume outputs are already correctly sized (batch_size, num_indices)
            outputs_inv = scaler.inverse_transform(outputs.detach().cpu().numpy())
            targets_inv = scaler.inverse_transform(targets.detach().cpu().numpy())
            
            predictions.append(outputs_inv)
            actuals.append(targets_inv)
    
    predictions = np.concatenate(predictions, axis=0)
    actuals = np.concatenate(actuals, axis=0)
    
    return predictions, actuals

predictions, actuals = predict_stock_indices(model, test_loader, scaler)



ValueError: non-broadcastable output operand with shape (16,1) doesn't match the broadcast shape (16,3)