In [1]:
import random
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn.functional as F
import pandas as pd
import getData
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))


Using cuda device


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv_stack = nn.Sequential(
            nn.Conv1d(1, 512, kernel_size=3, stride=1),
            nn.Conv1d(512,512,kernel_size=7, stride=1),
            nn.Conv1d(512,1024,kernel_size=9, stride = 1),
            nn.Conv1d(1024,512,kernel_size=5, stride=1),
            nn.Flatten(),
            nn.ReLU()
        )
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(40960, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        convs = self.conv_stack(x)
        #print(convs.shape)
        logits = self.linear_relu_stack(convs)
        return logits


In [4]:
model = NeuralNetwork().cuda()
print(model)

NeuralNetwork(
  (conv_stack): Sequential(
    (0): Conv1d(1, 512, kernel_size=(3,), stride=(1,))
    (1): Conv1d(512, 512, kernel_size=(7,), stride=(1,))
    (2): Conv1d(512, 1024, kernel_size=(9,), stride=(1,))
    (3): Conv1d(1024, 512, kernel_size=(5,), stride=(1,))
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): ReLU()
  )
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=40960, out_features=4096, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4096, out_features=4096, bias=True)
    (3): ReLU()
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU()
    (6): Linear(in_features=4096, out_features=1024, bias=True)
    (7): ReLU()
    (8): Linear(in_features=1024, out_features=512, bias=True)
    (9): ReLU()
    (10): Linear(in_features=512, out_features=1, bias=True)
    (11): Sigmoid()
  )
)


In [5]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

215238657

In [7]:
class Data(Dataset):
    def __init__(self,length):
        self.tickers = getData.getAllTickers()
        self.length = length
    def __len__(self):
        return self.length

    def RandomTickerItem(self):
        tickers = self.tickers
        size = 0
        while(size <= 100):
            ticker = random.choice(tickers)
            path = f'../TradingAI/Data/{ticker}.csv'
            while(not os.path.isfile(path)):
                ticker = random.choice(tickers)
                path = f'../TradingAI/Data/{ticker}.csv'
            stock = pd.read_csv(path)
            size = len(stock["Open"])
        length = random.randint(0,len(stock["Open"])-100)
        return torch.tensor(stock["Open"][length:length+100].to_list()).cuda()
    
    def RandomWalkItem(self, length):
        walk = []
        walk.append(((91.870000 - 7.780000) * torch.rand(1) + 7.780000).tolist()[0])
        #(r2 - r1) * torch.rand(a, b) + r1
        walks = []
        chnges = torch.normal(0.22,4.82,(length-1,1))
        for i in range(length-1):
            chng = float(chnges[i])
            walk.append(chng*abs(walk[-1])/100 + walk[-1])
        return torch.tensor(walk).cuda()
    
    def __getitem__(self,idx):
        if idx % 2 == 0:
            data = self.RandomTickerItem()
            target = 1
        else:
            data = self.RandomWalkItem(100)
            target = 0
        return torch.reshape(data,(1,100)).cuda(), torch.tensor(target).cuda()
            
        
        

In [8]:
training_data = Data(int(5e5))

FileNotFoundError: [Errno 2] No such file or directory: '../Data/Ticks.txt'

In [9]:
testing_data = Data(int(1e5))

FileNotFoundError: [Errno 2] No such file or directory: '../Data/Ticks.txt'

In [None]:
learning_rate = 1e-4
batch_size = 250
epochs = 3

In [None]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=batch_size, shuffle=True)

In [None]:
loss_fn = nn.BCELoss()

In [13]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [23]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        #print(pred)
        loss = loss_fn(pred, y.unsqueeze(-1).float())

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [24]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y.unsqueeze(-1).float()).item()
            correct += (pred.round().squeeze() == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [16]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")
#torch.save(model.state_dict(), 'model_weightsV3.pth')

Epoch 1
-------------------------------


IndexError: Cannot choose from an empty sequence

Analyze the model's predictions

# Analyzing results

In [10]:
model.load_state_dict(torch.load('model_weightsV3.pth'))

<All keys matched successfully>

In [11]:
test_features, test_labels = next(iter(test_dataloader))

In [12]:
model.eval()

NeuralNetwork(
  (conv_stack): Sequential(
    (0): Conv1d(1, 512, kernel_size=(3,), stride=(1,))
    (1): Conv1d(512, 512, kernel_size=(7,), stride=(1,))
    (2): Conv1d(512, 1024, kernel_size=(9,), stride=(1,))
    (3): Conv1d(1024, 512, kernel_size=(5,), stride=(1,))
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): ReLU()
  )
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=40960, out_features=4096, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4096, out_features=4096, bias=True)
    (3): ReLU()
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU()
    (6): Linear(in_features=4096, out_features=1024, bias=True)
    (7): ReLU()
    (8): Linear(in_features=1024, out_features=512, bias=True)
    (9): ReLU()
    (10): Linear(in_features=512, out_features=1, bias=True)
    (11): Sigmoid()
  )
)

In [13]:
pred = model(test_features)

In [27]:
test_loop(test_dataloader, model, loss_fn)

Test Error: 
 Accuracy: 87.5%, Avg loss: 0.292042 



This may seem as evidence to the fact that the stock market is not in fact a random walk. However it is possible that the NN was able to exploit any part of the methodology. For example the random walks formed may distinctly be different that the stock market that the NN was able to detect.

### Further investigation
- Longer sequences