In [1]:
import torch
from torch import nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from y_engineering import binarise

In [2]:
def kelly_criterion(win_pct, wl_ratio):
  return win_pct - (1-win_pct)/wl_ratio

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [4]:
data = pd.read_csv('/content/Binance_BTCUSDT_d.csv')

In [5]:
data.head()

Unnamed: 0,Unix,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USDT,tradecount
0,1674000000000.0,18/1/2023,BTCUSDT,21132.29,21650.0,20407.15,20677.47,350916.0195,7399719000.0,7617173
1,1673910000000.0,17/1/2023,BTCUSDT,21185.65,21647.45,20841.31,21134.81,275407.7441,5840586000.0,6440138
2,1673830000000.0,16/1/2023,BTCUSDT,20872.99,21474.05,20611.48,21185.65,293078.0826,6168654000.0,6856434
3,1673740000000.0,15/1/2023,BTCUSDT,20952.76,21050.74,20551.01,20871.5,178542.2255,3710527000.0,5194354
4,1673650000000.0,14/1/2023,BTCUSDT,19930.01,21258.0,19888.05,20954.92,393913.7495,8183072000.0,8659545


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1981 entries, 0 to 1980
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unix         1981 non-null   float64
 1   Date         1981 non-null   object 
 2   Symbol       1981 non-null   object 
 3   Open         1981 non-null   float64
 4   High         1981 non-null   float64
 5   Low          1981 non-null   float64
 6   Close        1981 non-null   float64
 7   Volume BTC   1981 non-null   float64
 8   Volume USDT  1981 non-null   float64
 9   tradecount   1981 non-null   int64  
dtypes: float64(7), int64(1), object(2)
memory usage: 154.9+ KB


In [119]:
y = binarise(data, 'Close', threshold = 0)

In [121]:
y.sum(), y.sum()/len(y)

(956, 0.48258455325593136)

In [8]:
def get_ordered_index(df, period_to_skip=100, val_split_ratio=0.2, test_split_ratio=0.2,  period_from_end_skip=0):
    # get ordered dates split into train, val and test based on index
    all_dates = list(df.index)
    all_train = int(len(all_dates)*(1-test_split_ratio))
    train = int(all_train*(1-val_split_ratio))

    train_dates = all_dates[period_to_skip:train]
    val_dates = all_dates[train:all_train]
    test_dates = all_dates[all_train:-1 - period_from_end_skip]
    return train_dates, val_dates, test_dates

In [9]:
def get_x(df, y_index, x_col = 'Close', period=21):
  return df.loc[y_index-period:y_index-1, x_col]

In [10]:
def get_xy(df, y, period, x_col = 'Close', val_split_ratio=0.2, test_split_ratio=0.2):
  x_traini, x_vali, x_testi = get_ordered_index(data, period_to_skip=period+1, val_split_ratio=0.2, test_split_ratio=0.2)
  x_train = np.zeros((len(x_traini),period))
  x_val = np.zeros((len(x_vali),period))
  x_test = np.zeros((len(x_testi),period))

  y_train = np.array(y[x_traini])
  y_val = np.array(y[x_vali])
  y_test = np.array([x_testi])

  for i in range(len(x_traini)):
    temp = np.array(get_x(df, x_traini[i], x_col = x_col, period=period))
    x_train[i, :] = temp
  
  for i in range(len(x_vali)):
    temp = np.array(get_x(df, x_vali[i], x_col = x_col, period=period))
    x_train[i, :] = temp
  
  for i in range(len(x_testi)):
    temp = np.array(get_x(df, x_testi[i], x_col = x_col, period=period))
    x_train[i, :] = temp

  return x_train, y_train, x_val, y_val, x_test, y_test


In [11]:
x_train, y_train, x_val, y_val, x_test, y_test = get_xy(data, y, 21, x_col = 'Close', val_split_ratio=0.2, test_split_ratio=0.2)

In [12]:
x_train.shape, y_train.shape, x_val.shape, x_test.shape

((1245, 21), (1245,), (317, 21), (396, 21))

In [15]:
def convert_to_tensor(x):
  device = "cuda" if torch.cuda.is_available() else "cpu"
  return [torch.tensor(i).to(device) for i in x]

In [16]:
temp = [x_train, y_train, x_val, y_val, x_test, y_test]
x_train, y_train, x_val, y_val, x_test, y_test = convert_to_tensor(temp)

In [17]:
type(x_train)

torch.Tensor

In [53]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, X, y, transform=None, target_transform=None):
        self.x = X
        self.y = y
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, idx):
        x = self.x[idx,:].type(torch.float)
        y = self.y[idx].type(torch.float)
        if self.transform:
            x = self.transform(x)
        if self.target_transform:
            y = self.target_transform(y)
        return torch.reshape(x, (1, -1)), y

In [54]:
x_train_ds = CustomDataset(x_train, y_train)
x_val_ds = CustomDataset(x_val, y_val)

In [55]:
BATCH_SIZE = 32
train_dataloader = DataLoader(x_train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(x_val_ds, batch_size=BATCH_SIZE, shuffle=True)

In [108]:
for X, y in test_dataloader:
  print(X.shape, y.shape)

torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([32, 1, 21]) torch.Size([32])
torch.Size([29, 1, 21]) torch.Size([29])


In [57]:
class basic_cnn(nn.Module):
  """
  Basic cnn model with 3 convolutional layers followed by 3 dense layers
  """

  def __init__(self):
      super(basic_cnn, self).__init__()
      self.conv1 = nn.Conv1d(1, 6, 5)
      self.conv2 = nn.Conv1d(6, 16, 3)
      self.conv3 = nn.Conv1d(16, 32, 3)
      # self.maxpool = nn.MaxPool1d(2, 2)
      
      self.fc1 = nn.Linear(416 , 64)
      self.fc2 = nn.Linear(64, 32)
      self.fc3 = nn.Linear(32, 1)

  def forward(self, x):
      x = self.conv1(x)
      x = nn.ReLU()(x)
      # x = self.maxpool(x)

      x = self.conv2(x)
      x = nn.ReLU()(x)
      # x = self.maxpool(x)

      x = self.conv3(x)
      x = nn.ReLU()(x)
      # x = self.maxpool(x)

      x = x.view(-1, self.num_flat_features(x))
      x = self.fc1(x)
      x = nn.ReLU()(x)
      x = self.fc2(x)
      x = nn.ReLU()(x)
      x = self.fc3(x)
      return x

  def num_flat_features(self, x):
      size = x.size()[1:]
      num_features = 1
      for s in size:
          num_features *= s
      return num_features

In [109]:
model = basic_cnn()
if torch.cuda.is_available():
    model.cuda()

In [110]:
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()}")

Layer: conv1.weight | Size: torch.Size([6, 1, 5])
Layer: conv1.bias | Size: torch.Size([6])
Layer: conv2.weight | Size: torch.Size([16, 6, 3])
Layer: conv2.bias | Size: torch.Size([16])
Layer: conv3.weight | Size: torch.Size([32, 16, 3])
Layer: conv3.bias | Size: torch.Size([32])
Layer: fc1.weight | Size: torch.Size([64, 416])
Layer: fc1.bias | Size: torch.Size([64])
Layer: fc2.weight | Size: torch.Size([32, 64])
Layer: fc2.bias | Size: torch.Size([32])
Layer: fc3.weight | Size: torch.Size([1, 32])
Layer: fc3.bias | Size: torch.Size([1])


In [117]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):        
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y.unsqueeze(1))
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # if batch % 5 == 0:
        #   loss, current = loss.item(), batch * len(X)
        #   print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    batch_num = len(dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y.unsqueeze(1)).item()
            pred = (torch.sigmoid(pred) > 0.5).type(torch.float32)
            correct += (pred==y).sum().item()/BATCH_SIZE
            
    test_loss /= batch_num
    correct /= batch_num
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [118]:
learning_rate = 0.001
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0170], device='cuda:0')
tensor([0.], device='cuda:0')
Test Error: 
 Accuracy: 54.8%, Avg loss: 0.021840 

Epoch 2
-------------------------------
tensor([-0.0109], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0109], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0109], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([-0.0109], devic

In [None]:
with torch.no_grad():
    for X, y in test_dataloader:
      pred = model(X)
      print(torch.sigmoid(pred))