In [166]:
#data essentials
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math

#self created tools
from getting_data import *
from Indicators import *
from y_engineering import *
from metric import *
from models import *

# PyTorch model and training necessities
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

#sklearn
from sklearn.metrics import *

#visualisation with tensorboard
from torch.utils.tensorboard import SummaryWriter

In [2]:
#use gpu if available
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    device = "cuda"

In [3]:
# obtaining data
df = pd.read_csv("appl_clean_train.csv")

In [4]:
# addig some features
df['y_binary_sma5'] = (sma(df, period=5, column='Adj Close').shift(periods=-5) > df['Adj Close']).astype(int)
ema(df, period=21, column='Adj Close', inplace=True)
ema(df, period=200, column='Adj Close', inplace=True)
df["ema_pct_21"] = pct_log(df, y_col='ema_21', time_to_pred = 1, pct=True, log=False)
df["ema_pct_200"] = pct_log(df, y_col='ema_200', time_to_pred = 1, pct=True, log=False)

In [5]:
df

Unnamed: 0,Adj Close,Volume,pct_log,pct,y_binary_sma5,ema_21,ema_200,ema_pct_21,ema_pct_200
0,0.789849,459177600,,,1,0.789849,0.789849,,
1,0.809797,597643200,-0.105724,0.025255,1,0.800298,0.799873,0.013229,0.012691
2,0.862517,1831132800,-0.298956,0.065103,0,0.823043,0.820963,0.028420,0.026367
3,0.845894,495924800,0.131580,-0.019273,0,0.829596,0.827290,0.007963,0.007706
4,0.807423,440876800,0.278119,-0.045480,1,0.824279,0.823237,-0.006410,-0.004899
...,...,...,...,...,...,...,...,...,...
4996,65.139717,84020400,0.004179,0.017534,0,63.019752,52.204988,0.003375,0.002496
4997,64.631065,105207600,-0.001877,-0.007809,0,63.166235,52.328630,0.002324,0.002368
4998,65.499199,65235600,0.003201,0.013432,0,63.378322,52.459681,0.003358,0.002504
4999,65.354912,46617600,-0.000527,-0.002203,0,63.558012,52.587992,0.002835,0.002446


In [6]:
def get_xy(df, period, x_col = ['pct'], y_col='pct', val_pct=0.2):
    """
    return training and validation data and y_pred
    
    period: how long we want the x values to go back
    x_col: all columns of potential features
    
    """
    
    train_index, val_index = get_ordered_index(df, period_to_skip=period+1, val_pct=0.2)

    y_train = np.array(df.loc[train_index, y_col])
    y_val = np.array(df.loc[val_index, y_col])
    
    x_train = np.zeros((len(train_index), len(x_col), period))
    x_val = np.zeros((len(val_index), len(x_col), period))
    
    for j in range(len(x_col)):
        x_column = x_col[j]

        for i, train_i in enumerate(train_index):
            temp = np.array(get_x(df, train_i, x_col = x_column, period=period))
            x_train[i, j, :] = temp

        for i, val_i in enumerate(val_index):
            temp = np.array(get_x(df, val_i, x_col = x_column, period=period))
            x_val[i, j, :] = temp

    # return np.squeeze(x_train), y_train, np.squeeze(x_val), y_val
    return x_train, np.expand_dims(y_train,1), x_val, np.expand_dims(y_val,1)


In [7]:
def convert_to_tensor(x):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    return [torch.tensor(i).to(device).type(torch.cuda.FloatTensor) for i in x]

temp = get_xy(df, 14, x_col = ["pct", 'ema_pct_21',"ema_pct_200"], y_col='y_binary_sma5', val_pct=0.2)
x_train, y_train, x_val, y_val = convert_to_tensor(temp)

In [167]:
x_train.shape, y_train.shape

(torch.Size([3985, 3, 14]), torch.Size([3985, 1]))

In [11]:
#turning data into a dataloader

batch_size = 64
x_train_ds = CustomDataset(x_train, y_train)
x_val_ds = CustomDataset(x_val, y_val)
train_dataloader = DataLoader(x_train_ds, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=device))
val_dataloader = DataLoader(x_val_ds, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=device))

In [155]:
#checking data shapes

temp = next(iter(train_dataloader))
temp[0].shape, temp[1].shape[0], temp[0][1, :, :], temp[1][1]

(torch.Size([64, 3, 14]),
 64,
 tensor([[-0.0054,  0.0135,  0.0082,  0.0047,  0.0182, -0.0158, -0.0147, -0.0271,
           0.0304, -0.0019, -0.0225, -0.0063,  0.0062,  0.0188],
         [ 0.0006,  0.0018,  0.0024,  0.0026,  0.0041,  0.0022,  0.0006, -0.0019,
           0.0010,  0.0007, -0.0014, -0.0019, -0.0012,  0.0006],
         [ 0.0015,  0.0016,  0.0017,  0.0017,  0.0019,  0.0017,  0.0015,  0.0012,
           0.0015,  0.0015,  0.0012,  0.0011,  0.0012,  0.0013]]),
 tensor([1.]))

In [169]:
class indicator_cnn(nn.Module):
    """
    
    
    """
    
    
    def __init__(self):
        super(indicator_cnn, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0)
        self.conv2 = nn.Conv1d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 8, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def predict(self, x, return_logits=True, threshold = 0.5):
        logits = torch.sigmoid(self.forward(x).numpy())
        if return_logits:
            return logits
        return (logits > threshold).astype("int")
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
model = indicator_cnn().cuda()

In [143]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas = (0.9, 0.999))

In [144]:
# using tensorboard visualisation tool
# To view, start TensorBoard on the command line with:
#tensorboard --logdir=runs
#and open a browser tab to http://localhost:6006/
from datetime import datetime

writer = SummaryWriter('runs/basic_indicator_cnn')
epoch_number = 0
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

epochs = 40

In [163]:
for t in range(epochs):
    print(f"Epoch {t+1} ------------------------>  ", end='')
    avg_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    avg_vloss, correct = test_loop(validation_dataloader, model, loss_fn, batch_size=64)
    
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    t + 1)
    writer.flush()
print("Done!")

Epoch 1 ------------------------>  Avg training loss: 0.009772, Avg val loss: 0.011362, Validation accuracy: 53.5% 

Epoch 2 ------------------------>  Avg training loss: 0.009723, Avg val loss: 0.011391, Validation accuracy: 54.2% 

Epoch 3 ------------------------>  Avg training loss: 0.009738, Avg val loss: 0.011379, Validation accuracy: 54.6% 

Epoch 4 ------------------------>  Avg training loss: 0.009667, Avg val loss: 0.011366, Validation accuracy: 55.5% 

Epoch 5 ------------------------>  Avg training loss: 0.009681, Avg val loss: 0.011386, Validation accuracy: 54.1% 

Epoch 6 ------------------------>  Avg training loss: 0.009642, Avg val loss: 0.011357, Validation accuracy: 54.6% 

Epoch 7 ------------------------>  Avg training loss: 0.009609, Avg val loss: 0.011450, Validation accuracy: 53.5% 

Epoch 8 ------------------------>  Avg training loss: 0.009631, Avg val loss: 0.011412, Validation accuracy: 53.0% 

Epoch 9 ------------------------>  Avg training loss: 0.009579, 