## Setup

In [34]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import defaultdict
import torch.optim as optim
import torch.nn.functional as F
from collections import Counter
import random
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [35]:
# Helper function for creating input to the model given full dataframe
def create_dataset(stock_data_full, lookback=30):
  # Initialize empty lists for storing sequences and labels
  sequences = []
  labels = []

  # Loop over the data and create 30-day sequences with corresponding labels
  for i in range(len(stock_data_full) - lookback):
      # Get the 30-day sequence for this row, including all features
      sequence = stock_data_full.iloc[i:i+lookback].values
      # Append the sequence to the list
      sequences.append(np.float32(sequence))
      # Get the label for this sequence, which is the stock price on the next trading day
      label = stock_data_full.iloc[i+lookback]['Label']
      # Append the label to the list
      labels.append(label)

  # Convert the sequences and labels to numpy arrays
  X = np.array(sequences)
  y = np.array(labels)

  # Print the shapes of X and y to verify they are the correct dimensions
  print(X.shape)  # should be (num_sequences, window_size, num_features)
  print(y.shape)  # should be (num_sequences,)

  print(X.dtype)
  print(y.dtype)
  return torch.from_numpy(X), torch.tensor(y)

In [36]:
# Definition of LSTM model to be used for our traning 
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm1 = nn.LSTM(input_size, hidden_size[0], batch_first=True, dropout=0.2)
        self.lstm2 = nn.LSTM(hidden_size[0], hidden_size[1], batch_first=True, dropout=0.2)
        self.lstm3 = nn.LSTM(hidden_size[1], hidden_size[2], batch_first=True, dropout=0.2)
        self.linear = nn.Linear(hidden_size[2], output_size)
        
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size[0]).to(device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size[0]).to(device)
        
        out, _ = self.lstm1(x, (h0, c0))
        out, _ = self.lstm2(out)
        out, _ = self.lstm3(out)
        out = self.linear(out[:, -1, :])
        
        return out

## Model Traning on 1 stock to check correctness

In [37]:
# load data
df = pd.read_csv('/content/drive/MyDrive/StocksDatasets/WithIndicators/BOM500010Stock.csv')
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)

In [38]:
# train-test split for time series
train_size = int(len(df) * 0.80)
test_size = len(df) - train_size
train, test = df[:train_size], df[train_size:]

In [39]:
lookback = 30
batch_size =32
X_train, y_train = create_dataset(train, lookback=lookback)
X_test, y_test = create_dataset(test, lookback=lookback)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
# Create training and testing datasets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# Create data loaders for batching the datasets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

(2566, 30, 26)
(2566,)
float32
float64
(619, 30, 26)
(619,)
float32
float64
torch.Size([2566, 30, 26]) torch.Size([2566])
torch.Size([619, 30, 26]) torch.Size([619])


In [40]:
from torch.types import Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Define the model hyperparameters
input_size = 26
hidden_size = [32, 16, 10]
output_size = 1
dropout_prob = 0.2

# Initialize the model
model = LSTMModel(input_size, hidden_size, output_size).to(device)

#summary(model, input_size=(30, 26))




In [41]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# setting up hyperparameters
learning_rate = 0.001
#batch_size = 64

# Initialize the model and the optimizer
#model = StockSelectionModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999))

# Define the loss function
criterion = nn.MSELoss()


In [42]:
num_epochs=20
# Train the LSTM model
for epoch in range(num_epochs):
    for i, (sequences, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(sequences.float()) # cast input tensor to Float
        loss = criterion(outputs, labels.float().unsqueeze(1)) # cast label tensor to Float
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

    # Print the loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/20], Loss: 0.7354
Epoch [20/20], Loss: 0.3096


In [43]:
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        outputs = model(sequences)
        predicted = torch.round(outputs).flatten().int()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on test set: {100 * correct / total:.2f}%")


Accuracy on test set: 20.03%


## Traning pipeline for all 31 stocks

In [44]:
traningAccuracies = dict()

In [45]:
def trainAndTestModelOnStock(stock):
  fileName = stock + 'Stock.csv'
  df = pd.read_csv('/content/drive/MyDrive/StocksDatasets/WithIndicators/' + fileName)
  df.set_index('Date', inplace=True)
  df.sort_index(inplace=True)
  
  # train-test split for time series
  train_size = int(len(df) * 0.80)
  test_size = len(df) - train_size
  train, test = df[:train_size], df[train_size:]

  lookback = 30
  batch_size =32
  X_train, y_train = create_dataset(train, lookback=lookback)
  X_test, y_test = create_dataset(test, lookback=lookback)
  print(X_train.shape, y_train.shape)
  print(X_test.shape, y_test.shape)
  
  # Create training and testing datasets
  train_dataset = TensorDataset(X_train, y_train)
  test_dataset = TensorDataset(X_test, y_test)

  # Create data loaders for batching the datasets
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  from torch.types import Device
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  # Define the model hyperparameters
  input_size = 26
  hidden_size = [32, 16, 10]
  output_size = 1
  dropout_prob = 0.2
  learning_rate = 0.001

  # Initialize the model
  model = LSTMModel(input_size, hidden_size, output_size).to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999))
  criterion = nn.MSELoss()

  # Train the LSTM model
  num_epochs=20
  for epoch in range(num_epochs):
      for i, (sequences, labels) in enumerate(train_loader):
          optimizer.zero_grad()
          outputs = model(sequences.float()) # cast input tensor to Float
          loss = criterion(outputs, labels.float().unsqueeze(1)) # cast label tensor to Float
          loss.backward()
          nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
          optimizer.step()

      # Print the loss every 10 epochs
      if (epoch + 1) % 10 == 0:
          print(f"{stock} Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
  
  # Test the Model and save accuracy
  correct = 0
  total = 0
  with torch.no_grad():
      for sequences, labels in test_loader:
          outputs = model(sequences)
          predicted = torch.round(outputs).flatten().int()
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  print(f"{stock} Accuracy on test set: {100 * correct / total:.2f}%")
  traningAccuracies[stock] = correct/total









In [46]:
STOCKS = ['BOM500875','BOM532939','BOM524715','BOM532215','BOM532648','BOM532500','BOM500470','BOM500570','BOM500112','BOM532540','BOM500209','BOM500295','BOM532174','BOM500180','BOM500696','BOM500510','BOM500247','BOM500520','BOM532921','BOM532977','BOM500182','BOM507685','BOM500010','BOM532454','BOM500820','BOM500312','BOM532898','BOM532187','BOM533278','BOM532555','BOM570001']

for stock in STOCKS:
  trainAndTestModelOnStock(stock)

(2922, 30, 26)
(2922,)
float32
float64
(708, 30, 26)
(708,)
float32
float64
torch.Size([2922, 30, 26]) torch.Size([2922])
torch.Size([708, 30, 26]) torch.Size([708])




BOM500875 Epoch [10/20], Loss: 0.5820
BOM500875 Epoch [20/20], Loss: 0.2924
BOM500875 Accuracy on test set: 57.77%
(2134, 30, 26)
(2134,)
float32
float64
(512, 30, 26)
(512,)
float32
float64
torch.Size([2134, 30, 26]) torch.Size([2134])
torch.Size([512, 30, 26]) torch.Size([512])




BOM532939 Epoch [10/20], Loss: 0.5374
BOM532939 Epoch [20/20], Loss: 0.5786
BOM532939 Accuracy on test set: 62.70%
(2922, 30, 26)
(2922,)
float32
float64
(708, 30, 26)
(708,)
float32
float64
torch.Size([2922, 30, 26]) torch.Size([2922])
torch.Size([708, 30, 26]) torch.Size([708])




BOM524715 Epoch [10/20], Loss: 0.2836
BOM524715 Epoch [20/20], Loss: 0.4811
BOM524715 Accuracy on test set: 38.84%
(2926, 30, 26)
(2926,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([709, 30, 26]) torch.Size([709])




BOM532215 Epoch [10/20], Loss: 0.3088
BOM532215 Epoch [20/20], Loss: 0.3173
BOM532215 Accuracy on test set: 72.07%
(2650, 30, 26)
(2650,)
float32
float64
(641, 30, 26)
(641,)
float32
float64
torch.Size([2650, 30, 26]) torch.Size([2650])
torch.Size([641, 30, 26]) torch.Size([641])




BOM532648 Epoch [10/20], Loss: 0.4333
BOM532648 Epoch [20/20], Loss: 0.5846
BOM532648 Accuracy on test set: 41.65%
(2925, 30, 26)
(2925,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2925, 30, 26]) torch.Size([2925])
torch.Size([709, 30, 26]) torch.Size([709])




BOM532500 Epoch [10/20], Loss: 0.1058
BOM532500 Epoch [20/20], Loss: 0.6029
BOM532500 Accuracy on test set: 43.72%
(2922, 30, 26)
(2922,)
float32
float64
(708, 30, 26)
(708,)
float32
float64
torch.Size([2922, 30, 26]) torch.Size([2922])
torch.Size([708, 30, 26]) torch.Size([708])




BOM500470 Epoch [10/20], Loss: 0.2386
BOM500470 Epoch [20/20], Loss: 0.0525
BOM500470 Accuracy on test set: 43.79%
(2922, 30, 26)
(2922,)
float32
float64
(708, 30, 26)
(708,)
float32
float64
torch.Size([2922, 30, 26]) torch.Size([2922])
torch.Size([708, 30, 26]) torch.Size([708])




BOM500570 Epoch [10/20], Loss: 0.4643
BOM500570 Epoch [20/20], Loss: 0.3219
BOM500570 Accuracy on test set: 59.04%
(2922, 30, 26)
(2922,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2922, 30, 26]) torch.Size([2922])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500112 Epoch [10/20], Loss: 1.0245
BOM500112 Epoch [20/20], Loss: 0.0260
BOM500112 Accuracy on test set: 52.19%
(2828, 30, 26)
(2828,)
float32
float64
(685, 30, 26)
(685,)
float32
float64
torch.Size([2828, 30, 26]) torch.Size([2828])
torch.Size([685, 30, 26]) torch.Size([685])




BOM532540 Epoch [10/20], Loss: 0.5186
BOM532540 Epoch [20/20], Loss: 0.2744
BOM532540 Accuracy on test set: 65.69%
(2925, 30, 26)
(2925,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2925, 30, 26]) torch.Size([2925])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500209 Epoch [10/20], Loss: 0.2219
BOM500209 Epoch [20/20], Loss: 0.3182
BOM500209 Accuracy on test set: 57.26%
(2922, 30, 26)
(2922,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2922, 30, 26]) torch.Size([2922])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500295 Epoch [10/20], Loss: 0.5143
BOM500295 Epoch [20/20], Loss: 0.0642
BOM500295 Accuracy on test set: 63.05%
(2926, 30, 26)
(2926,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([709, 30, 26]) torch.Size([709])




BOM532174 Epoch [10/20], Loss: 0.0991
BOM532174 Epoch [20/20], Loss: 0.8957
BOM532174 Accuracy on test set: 57.40%
(2925, 30, 26)
(2925,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2925, 30, 26]) torch.Size([2925])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500180 Epoch [10/20], Loss: 0.0751
BOM500180 Epoch [20/20], Loss: 0.7834
BOM500180 Accuracy on test set: 39.92%
(2926, 30, 26)
(2926,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500696 Epoch [10/20], Loss: 0.7803
BOM500696 Epoch [20/20], Loss: 0.6028
BOM500696 Accuracy on test set: 51.06%
(2906, 30, 26)
(2906,)
float32
float64
(705, 30, 26)
(705,)
float32
float64
torch.Size([2906, 30, 26]) torch.Size([2906])
torch.Size([705, 30, 26]) torch.Size([705])




BOM500510 Epoch [10/20], Loss: 0.4299
BOM500510 Epoch [20/20], Loss: 0.2651
BOM500510 Accuracy on test set: 45.82%
(2926, 30, 26)
(2926,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500247 Epoch [10/20], Loss: 0.2333
BOM500247 Epoch [20/20], Loss: 0.6643
BOM500247 Accuracy on test set: 59.24%
(2926, 30, 26)
(2926,)
float32
float64
(710, 30, 26)
(710,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([710, 30, 26]) torch.Size([710])




BOM500520 Epoch [10/20], Loss: 0.1082
BOM500520 Epoch [20/20], Loss: 0.3744
BOM500520 Accuracy on test set: 61.27%
(2175, 30, 26)
(2175,)
float32
float64
(522, 30, 26)
(522,)
float32
float64
torch.Size([2175, 30, 26]) torch.Size([2175])
torch.Size([522, 30, 26]) torch.Size([522])




BOM532921 Epoch [10/20], Loss: 0.2476
BOM532921 Epoch [20/20], Loss: 0.3293
BOM532921 Accuracy on test set: 44.06%
(2079, 30, 26)
(2079,)
float32
float64
(498, 30, 26)
(498,)
float32
float64
torch.Size([2079, 30, 26]) torch.Size([2079])
torch.Size([498, 30, 26]) torch.Size([498])




BOM532977 Epoch [10/20], Loss: 0.5393
BOM532977 Epoch [20/20], Loss: 0.5400
BOM532977 Accuracy on test set: 65.26%
(2925, 30, 26)
(2925,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2925, 30, 26]) torch.Size([2925])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500182 Epoch [10/20], Loss: 0.5131
BOM500182 Epoch [20/20], Loss: 0.2605
BOM500182 Accuracy on test set: 44.57%
(2926, 30, 26)
(2926,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([709, 30, 26]) torch.Size([709])




BOM507685 Epoch [10/20], Loss: 0.0569
BOM507685 Epoch [20/20], Loss: 0.2944
BOM507685 Accuracy on test set: 46.97%
(2566, 30, 26)
(2566,)
float32
float64
(619, 30, 26)
(619,)
float32
float64
torch.Size([2566, 30, 26]) torch.Size([2566])
torch.Size([619, 30, 26]) torch.Size([619])




BOM500010 Epoch [10/20], Loss: 0.9292
BOM500010 Epoch [20/20], Loss: 0.5182
BOM500010 Accuracy on test set: 33.76%
(2922, 30, 26)
(2922,)
float32
float64
(708, 30, 26)
(708,)
float32
float64
torch.Size([2922, 30, 26]) torch.Size([2922])
torch.Size([708, 30, 26]) torch.Size([708])




BOM532454 Epoch [10/20], Loss: 0.2743
BOM532454 Epoch [20/20], Loss: 0.3135
BOM532454 Accuracy on test set: 37.85%
(2926, 30, 26)
(2926,)
float32
float64
(709, 30, 26)
(709,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([709, 30, 26]) torch.Size([709])




BOM500820 Epoch [10/20], Loss: 0.3127
BOM500820 Epoch [20/20], Loss: 0.3445
BOM500820 Accuracy on test set: 64.03%
(2926, 30, 26)
(2926,)
float32
float64
(710, 30, 26)
(710,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([710, 30, 26]) torch.Size([710])




BOM500312 Epoch [10/20], Loss: 0.0766
BOM500312 Epoch [20/20], Loss: 0.0916
BOM500312 Accuracy on test set: 37.61%
(2205, 30, 26)
(2205,)
float32
float64
(529, 30, 26)
(529,)
float32
float64
torch.Size([2205, 30, 26]) torch.Size([2205])
torch.Size([529, 30, 26]) torch.Size([529])




BOM532898 Epoch [10/20], Loss: 0.5495
BOM532898 Epoch [20/20], Loss: 0.2506
BOM532898 Accuracy on test set: 26.84%
(2926, 30, 26)
(2926,)
float32
float64
(710, 30, 26)
(710,)
float32
float64
torch.Size([2926, 30, 26]) torch.Size([2926])
torch.Size([710, 30, 26]) torch.Size([710])




BOM532187 Epoch [10/20], Loss: 0.5860
BOM532187 Epoch [20/20], Loss: 0.0721
BOM532187 Accuracy on test set: 11.69%
(1595, 30, 26)
(1595,)
float32
float64
(377, 30, 26)
(377,)
float32
float64
torch.Size([1595, 30, 26]) torch.Size([1595])
torch.Size([377, 30, 26]) torch.Size([377])




BOM533278 Epoch [10/20], Loss: 0.4500
BOM533278 Epoch [20/20], Loss: 0.3792
BOM533278 Accuracy on test set: 39.79%
(2789, 30, 26)
(2789,)
float32
float64
(675, 30, 26)
(675,)
float32
float64
torch.Size([2789, 30, 26]) torch.Size([2789])
torch.Size([675, 30, 26]) torch.Size([675])




BOM532555 Epoch [10/20], Loss: 0.7574
BOM532555 Epoch [20/20], Loss: 0.4079
BOM532555 Accuracy on test set: 54.22%
(1910, 30, 26)
(1910,)
float32
float64
(456, 30, 26)
(456,)
float32
float64
torch.Size([1910, 30, 26]) torch.Size([1910])
torch.Size([456, 30, 26]) torch.Size([456])




BOM570001 Epoch [10/20], Loss: 0.1893
BOM570001 Epoch [20/20], Loss: 0.6618
BOM570001 Accuracy on test set: 63.82%


In [57]:
print(traningAccuracies)

{'BOM500875': 0.577683615819209, 'BOM532939': 0.626953125, 'BOM524715': 0.3884180790960452, 'BOM532215': 0.7207334273624824, 'BOM532648': 0.4165366614664587, 'BOM532500': 0.43723554301833567, 'BOM500470': 0.4378531073446328, 'BOM500570': 0.5903954802259888, 'BOM500112': 0.5218617771509168, 'BOM532540': 0.656934306569343, 'BOM500209': 0.5726375176304654, 'BOM500295': 0.6304654442877292, 'BOM532174': 0.5740479548660085, 'BOM500180': 0.3991537376586742, 'BOM500696': 0.5105782792665726, 'BOM500510': 0.4581560283687943, 'BOM500247': 0.5923836389280677, 'BOM500520': 0.6126760563380281, 'BOM532921': 0.44061302681992337, 'BOM532977': 0.6526104417670683, 'BOM500182': 0.44569816643159377, 'BOM507685': 0.4696755994358251, 'BOM500010': 0.3376413570274637, 'BOM532454': 0.3785310734463277, 'BOM500820': 0.6403385049365303, 'BOM500312': 0.376056338028169, 'BOM532898': 0.2684310018903592, 'BOM532187': 0.11690140845070422, 'BOM533278': 0.3978779840848806, 'BOM532555': 0.5422222222222223, 'BOM570001': 0.

In [56]:
print('Classification Accuracies for each of 31 stock on Testing set is as follows: ')

print ("{:<10} {:<5}".format('Stock','Accuracy'))
for k, v in traningAccuracies.items():
        accuracy = v
        print ("{:<8} {:<15}".format(k, accuracy))

Classification Accuracies for each of 31 stock on Testing set is as follows: 
Stock      Accuracy
BOM500875 0.577683615819209
BOM532939 0.626953125    
BOM524715 0.3884180790960452
BOM532215 0.7207334273624824
BOM532648 0.4165366614664587
BOM532500 0.43723554301833567
BOM500470 0.4378531073446328
BOM500570 0.5903954802259888
BOM500112 0.5218617771509168
BOM532540 0.656934306569343
BOM500209 0.5726375176304654
BOM500295 0.6304654442877292
BOM532174 0.5740479548660085
BOM500180 0.3991537376586742
BOM500696 0.5105782792665726
BOM500510 0.4581560283687943
BOM500247 0.5923836389280677
BOM500520 0.6126760563380281
BOM532921 0.44061302681992337
BOM532977 0.6526104417670683
BOM500182 0.44569816643159377
BOM507685 0.4696755994358251
BOM500010 0.3376413570274637
BOM532454 0.3785310734463277
BOM500820 0.6403385049365303
BOM500312 0.376056338028169
BOM532898 0.2684310018903592
BOM532187 0.11690140845070422
BOM533278 0.3978779840848806
BOM532555 0.5422222222222223
BOM570001 0.6381578947368421


## Portfolio Optimization