In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join
files = ['./data/' + f for f in listdir('./data/') if isfile(join('./data/', f))]

trainData = []
valData = []
testData = []

for i, fileName in enumerate(files):
    if i < 120:
        trainData.append(fileName)
    elif i < 160:
        valData.append(fileName)
    else:
        testData.append(fileName)


df = pd.read_csv(trainData[1])

print(df.iloc[3])

ticker              2000-06-13
ISRG US Equity           2.007
ISRG US Equity.1           2.0
ISRG US Equity.2           NaN
ISRG US Equity.3           NaN
ISRG US Equity.4           NaN
Predictions               DOWN
OneHotPred                   0
Name: 3, dtype: object


In [2]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import math

class CustomImageDataset(Dataset):
    def __init__(self, files, transform=None, target=None):
        self.lengths = []
        self.data = []

        for f in files:
            df = pd.read_csv(f)
            df = df.dropna()
            df = df[df.OneHotPred != 'X']
            df = df[df.OneHotPred != 'UP']
            df = df[df.OneHotPred != 'DOWN']
            #df = df[df[5] != 'UP']
            self.lengths.append(len(df) - 2)
            self.data.append(df)

        self.transforms = transform
        self.target = target

    def __len__(self):
        return sum(self.lengths) - 5

    def __getitem__(self, idx):
        
        i = 0
        while idx > self.lengths[i]:
            idx -= self.lengths[i]
            i += 1
        
        rowsBack = 5
        quiz = torch.zeros(25)
        label = torch.zeros(1)
        quizI = 0

        while rowsBack > 0:

            if i < 0:
                i -= 1
                idx = self.lengths[i]
            
            row = self.data[i].iloc[idx + 1]
            for p in range(5):
                if row[p + 1] == 'UP' or row[p + 1] == 'DOWN':
                    quiz[quizI] = torch.tensor(float(24))
                else:
                    quiz[quizI] = torch.tensor(float(row[p + 1]))
                    
                quizI += 1

            if rowsBack == 5:
                try:
                    label = torch.tensor(int(row[7]))
                except:
                    label = torch.tensor(int(1))

            rowsBack -= 1

        quiz = quiz.div(quiz.norm())

        return quiz, label
        

trainDataset = CustomImageDataset(trainData)
valDataset = CustomImageDataset(valData)
testDataset = CustomImageDataset(testData)
        
train_dataloader = DataLoader(trainDataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(valDataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(testDataset, batch_size=16, shuffle=True)


In [3]:
from torch import nn
from torch.nn import functional as F

class Net(nn.Module):
    def __init__(self, input_shape):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_shape, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x


In [14]:
import sys
learning_rate = 0.01
epochs = 5

model = Net(input_shape=torch.rand(25).shape[0])
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.BCELoss()

losses = []
accur = []
for i in range(epochs):
  accs = []
  for j,(x_train,y_train) in enumerate(train_dataloader):
    
    #calculate output
    output = model(x_train)

    #output = torch.FloatTensor(output.squeeze())
    y_train = y_train.type(torch.FloatTensor)
    output = output.squeeze().type(torch.FloatTensor)
    #calculate loss
    loss = loss_fn(output, y_train)

    #print(output)
    #accuracy
    accL = [int(round(o.item()) == i.item()) for o, i in zip(output.reshape(-1,1), y_train)]
    accs.append(sum(accL)/len(accL))
    #acc = (output.reshape(-1,1).detach().numpy().round() == y_train)
    #backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if i%1 == 0:
    losses.append(loss)
    accur.append(sum(accs)/len(accs))
    acc = sum(accur) / len(accur)
    print("epoch {}\tloss : {}\t accuracy : {}".format(i,loss,acc))

epoch 0	loss : 0.6860936284065247	 accuracy : 0.5120161261188032


KeyboardInterrupt: 

In [None]:
import torch
k = torch.rand(25)
k[0] = torch.tensor(300)
print(k)
print(k.shape)
k.div(k.norm())

tensor([3.0000e+02, 2.9678e-01, 4.8754e-02, 8.8310e-01, 7.1240e-01, 9.9613e-01,
        3.3590e-01, 5.3742e-02, 9.1234e-01, 9.2840e-01, 2.8780e-01, 2.2411e-01,
        5.0788e-03, 2.4976e-01, 7.6758e-01, 1.0515e-01, 6.2628e-01, 8.6017e-01,
        2.2847e-01, 1.1713e-01, 3.3046e-01, 7.3386e-01, 9.7360e-01, 2.3771e-01,
        4.1323e-01])
torch.Size([25])


tensor([9.9996e-01, 9.8921e-04, 1.6251e-04, 2.9435e-03, 2.3745e-03, 3.3203e-03,
        1.1196e-03, 1.7913e-04, 3.0410e-03, 3.0945e-03, 9.5929e-04, 7.4700e-04,
        1.6929e-05, 8.3251e-04, 2.5585e-03, 3.5048e-04, 2.0875e-03, 2.8671e-03,
        7.6155e-04, 3.9042e-04, 1.1015e-03, 2.4461e-03, 3.2452e-03, 7.9232e-04,
        1.3774e-03])

In [None]:
df = pd.read_csv('./data/A.csv')
df = df.dropna()
df = df[df.OneHotPred != 'X']
df

Unnamed: 0,ticker,A US Equity,A US Equity.1,A US Equity.2,A US Equity.3,A US Equity.4,Predictions,OneHotPred
170,2000-08-31,41.1,41.5211,40.7631,39.0728,50.0361,UP,1
171,2000-09-01,41.6896,40.8895,41.2179,38.8858,50.1435,DOWN,0
172,2000-09-05,41.6475,41.5632,41.6475,38.7064,50.2035,DOWN,0
173,2000-09-06,40.4262,41.5632,41.2432,38.5026,50.2696,UP,1
174,2000-09-07,41.6922,40.4262,41.3111,38.3199,50.3298,DOWN,0
...,...,...,...,...,...,...,...,...
5762,2022-11-22,156.86,151.98,147.95,133.5466,129.2391,DOWN,0
5763,2022-11-23,155.35,156.67,149.596,133.9886,129.3118,UP,1
5764,2022-11-25,156.96,155.09,152.1,134.4646,129.3744,DOWN,0
5765,2022-11-28,152.3,155.87,153.322,134.9128,129.4276,DOWN,0


In [90]:
df[5]

KeyError: 5