In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
from finta import TA

In [9]:
class TimeSeriesDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device) # Hidden state
        c_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device) # Cell state

        # Forward propagate LSTM
        _, (hn, _) = self.lstm(x, (h_0, c_0))  # Get final hidden state

        # Decode the hidden state of the last time step
        out = self.fc(hn[-1])
        return out

In [10]:
input_size = 12  # Number of feature columns (change this to your `n`)
hidden_size = 64
num_layers = 1
output_size = 1  # Predicting a single value (e.g., price)

model = LSTMModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
def train_model(model, train_loader, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for sequences, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [12]:
# Prediction with confidence intervals calculation (simplified)
def predict_with_confidence(model, data_loader):
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for sequences, labels in data_loader:
            outputs = model(sequences)
            predictions.extend(outputs.numpy())
            actuals.extend(labels.numpy())

    predictions = np.array(predictions).flatten()
    actuals = np.array(actuals).flatten()
    mse = np.mean((predictions - actuals) ** 2)
    std_error = np.sqrt(mse)
    confidence_interval = 1.96 * std_error  # 95% confidence interval

    return predictions, actuals, confidence_interval

In [13]:
#load csv data
df = pd.read_csv("../csv/NVDA_from_2021.csv")

df["sma15"] = TA.SMA(df,15)
df["sma200"] = TA.SMA(df,200)
df["rsi"] = TA.RSI(df,14,'close')
df['macd'] = TA.MACD(df)["MACD"]
df['macd_signal'] = TA.MACD(df)["SIGNAL"]
df.tail(10)


Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap,sma15,sma200,rsi,macd,macd_signal
1314,NVDA,2025-03-26 04:00:00+00:00,118.73,118.84,112.71,113.76,296431667.0,2420079.0,114.711652,115.770667,133.182975,42.792976,-2.415055,-3.16963
1315,NVDA,2025-03-27 04:00:00+00:00,111.35,114.45,110.66,111.43,236902055.0,1790812.0,112.34066,115.828,127.695725,40.749753,-2.747865,-3.085277
1316,NVDA,2025-03-28 04:00:00+00:00,111.485,112.87,109.0701,109.67,229872549.0,1847538.0,110.119953,115.626667,127.635125,39.226188,-3.117698,-3.091761
1317,NVDA,2025-03-31 04:00:00+00:00,105.13,110.955,103.65,108.38,299212213.0,2328689.0,106.086596,115.72,127.572475,38.101733,-3.474829,-3.168375
1318,NVDA,2025-04-01 04:00:00+00:00,108.515,110.2,106.47,110.15,222614034.0,1780615.0,108.599925,115.812667,127.497225,40.617069,-3.573837,-3.249467
1319,NVDA,2025-04-02 04:00:00+00:00,107.29,111.98,106.79,110.42,220601243.0,1721288.0,109.775329,115.458,127.401275,41.010859,-3.589142,-3.317402
1320,NVDA,2025-04-03 04:00:00+00:00,103.51,105.63,101.6,101.8,338768918.0,3041134.0,103.32552,114.539333,127.250875,33.396515,-4.247866,-3.503495
1321,NVDA,2025-04-04 04:00:00+00:00,98.91,100.13,92.11,94.31,532271555.0,4606981.0,95.178482,112.715333,127.067525,28.453145,-5.313045,-3.865405
1322,NVDA,2025-04-07 04:00:00+00:00,87.46,101.75,86.62,97.64,611041347.0,5164576.0,95.248274,111.256,126.877825,33.188172,-5.821398,-4.256604
1323,NVDA,2025-04-08 04:00:00+00:00,103.805,105.85,94.46,96.3,470491731.0,3893105.0,100.953223,109.980667,126.705425,32.262876,-6.260234,-4.65733


In [14]:
split_ratio = 0.8
split_index = int(len(sequences))

train_sequences =   # Numpy array: shape (num_samples, sequence_length, input_size)
train_labels = ...     # Numpy array: shape (num_samples, output_size)
test_sequences = ...   # Make sure these arrays are prepared
test_labels = ...

batch_size = 32
train_dataset = TimeSeriesDataset(torch.from_numpy(train_sequences).float(), torch.from_numpy(train_labels).float())
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TimeSeriesDataset(torch.from_numpy(test_sequences).float(), torch.from_numpy(test_labels).float())
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Train the model
train_model(model, train_loader, num_epochs=100)

# Make predictions and calculate confidence
predictions, actuals, confidence_interval = predict_with_confidence(model, test_loader)

print(f'Predictions: {predictions}')
print(f'Actuals: {actuals}')
print(f'Confidence Interval: +/- {confidence_interval}')

TypeError: expected np.ndarray (got ellipsis)