In [1]:
import os 
import yfinance as yf
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset


In [2]:

def download_data(ticker, start='2010-01-01', end='2024-01-01'):
    data = yf.download(ticker, start=start, end=end)
    return data

def compute_momentum_features(data, periods=[1,2,3,4,5,10,15,20,60,120,180,240]):
    for p in periods:
        data[f'mom_{p}'] = data['Close'].pct_change(p)
    return data.dropna()


In [3]:

def prepare_data(data):
    #data['next_open_pct_change'] = data['Open'].pct_change().shift(-1)
    data['next_open_pct_change'] = (data['Open'].shift(-1) > data['Close']).astype(int)
    data = data.dropna()
    
    features = [col for col in data.columns if 'mom_' in col]
    X = data[features].values
    y = data['next_open_pct_change'].values.reshape(-1,1)
    
    scaler_X, scaler_y = StandardScaler(), StandardScaler()
    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
    y_train, y_test = torch.tensor(y_train, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)
    
    return X_train.unsqueeze(1), X_test.unsqueeze(1), y_train, y_test, scaler_y

In [None]:

class LSTMModel(nn.Module):
    def __init__(self, input_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, 128, num_layers=3, batch_first=True, dropout=0.2)
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.relu(self.fc1(x[:, -1, :]))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x
### remove batch training, save best model and include a validation set 
def train_model(model, train_loader, criterion, optimizer, epochs=1000):
    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss.item():.6f}')

def evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            loss = torch.sqrt(criterion(y_pred, y_batch))
            total_loss += loss.item()
    print(f'Test RMSE: {total_loss / len(test_loader):.6f}')



def main():
    ticker = 'SPY'
    data = download_data(ticker)
    data = compute_momentum_features(data)
    X_train, X_test, y_train, y_test, scaler_y = prepare_data(data)
    
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
    test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=32, shuffle=False)
    
    model = LSTMModel(input_size=X_train.shape[2])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    train_model(model, train_loader, criterion, optimizer)
    evaluate_model(model, test_loader, criterion)

if __name__ == '__main__':
    main()

In [5]:
ticker = 'SPY'
data = download_data(ticker)
data = compute_momentum_features(data)

data['next_open_pct_change'] = (data['Open'].shift(-1) / data['Close'] -1 ) 
data.head()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,mom_1,mom_2,mom_3,mom_4,mom_5,mom_10,mom_15,mom_20,mom_60,mom_120,mom_180,mom_240,next_open_pct_change
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2010-12-15,95.548523,96.187568,95.386838,95.810303,160823100,-0.004572,-0.003693,-0.003053,0.002747,0.006652,0.025535,0.0477,0.050271,0.088788,0.156618,0.071436,0.110687,0.000644
2010-12-16,96.102844,96.172141,95.279018,95.610089,185035200,0.005801,0.001203,0.002087,0.002731,0.008565,0.01844,0.038436,0.055828,0.100512,0.167007,0.070334,0.114182,-0.000701
2010-12-17,96.205765,96.329599,95.834252,96.035488,141075300,0.001071,0.006879,0.002275,0.003161,0.003805,0.016793,0.051798,0.041628,0.110699,0.205476,0.062818,0.114591,0.002736
2010-12-20,96.437981,96.670177,95.958116,96.468941,119085500,0.002414,0.003487,0.009309,0.004694,0.005582,0.020326,0.051152,0.041278,0.090884,0.219975,0.062878,0.112584,0.00313
2010-12-21,97.049408,97.111328,96.646941,96.739815,94965500,0.00634,0.008769,0.009849,0.015708,0.011064,0.02621,0.063798,0.048751,0.103084,0.233206,0.075761,0.115925,0.000718


In [6]:
data.columns = [i[0] for i in data.columns]
data.columns

features = [col for col in data.columns if 'mom_' in col]
features.append('next_open_pct_change')
features

['mom_1',
 'mom_2',
 'mom_3',
 'mom_4',
 'mom_5',
 'mom_10',
 'mom_15',
 'mom_20',
 'mom_60',
 'mom_120',
 'mom_180',
 'mom_240',
 'next_open_pct_change']

data = data.dropna()
X = data[features].values
y = data['next_open_pct_change'].values.reshape(-1,1)

scaler_X, scaler_y = StandardScaler(), StandardScaler()
X = scaler_X.fit_transform(X)
y = scaler_y.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)


In [7]:
X_train, X_test, y_train, y_test, scaler_y = prepare_data(data)

In [8]:
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=32, shuffle=False)

In [9]:

model = LSTMModel(input_size=X_train.shape[2])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
train_model(model, train_loader, criterion, optimizer)

Epoch 0, Loss: 1.237116
Epoch 10, Loss: 0.735760
Epoch 20, Loss: 1.806503
Epoch 30, Loss: 0.999390
Epoch 40, Loss: 3.472342
Epoch 50, Loss: 2.742571
Epoch 60, Loss: 0.604045
Epoch 70, Loss: 0.456655
Epoch 80, Loss: 1.580826
Epoch 90, Loss: 0.399314
Epoch 100, Loss: 1.138080
Epoch 110, Loss: 1.139442
Epoch 120, Loss: 1.318484
Epoch 130, Loss: 1.265870
Epoch 140, Loss: 0.833204
Epoch 150, Loss: 0.557449
Epoch 160, Loss: 0.130003
Epoch 170, Loss: 0.012848
Epoch 180, Loss: 0.420345
Epoch 190, Loss: 1.049511
Epoch 200, Loss: 0.776601
Epoch 210, Loss: 0.397472
Epoch 220, Loss: 0.002952
Epoch 230, Loss: 0.012708
Epoch 240, Loss: 0.000254
Epoch 250, Loss: 0.056747
Epoch 260, Loss: 1.519042
Epoch 270, Loss: 0.977417
Epoch 280, Loss: 0.074219
Epoch 290, Loss: 2.951344
Epoch 300, Loss: 0.051394
Epoch 310, Loss: 0.104906
Epoch 320, Loss: 0.154448
Epoch 330, Loss: 0.057318
Epoch 340, Loss: 0.001344
Epoch 350, Loss: 0.095298
Epoch 360, Loss: 0.030267
Epoch 370, Loss: 0.013417
Epoch 380, Loss: 0.0034

In [11]:
evaluate_model(model, test_loader, criterion)

Test RMSE: 1.363733


data.columns = [i[0] for i in data.columns]
data.columns

data['next_open_pct_change'] = (data['Open'].shift(-1) / data['Close'] -1 ) 
data.head()

data = data[['Volume', 'mom_1', 'mom_2', 'mom_3',
       'mom_4', 'mom_5', 'mom_10', 'mom_15', 'mom_20', 'mom_60', 'mom_120',
       'mom_180', 'mom_240', 'next_open_pct_change']]

data.head()

data.next_open_pct_change.hist()