Хотел обучить какую нибудь простую модельку, не вышло, работает слишком плохо, не успел придумать ничего другого.

Идея была следующая - обучить 2 модельки, которые предсказывают отдельно ask и bid цены. Если следующая bid цена будет выше предыдущей ask цены (при условии что разница цен больше 2 комиссий), то мы покупаем на данном шаге. Если наоборот, то продаем.
Идем окном размера N по ценам и подаем в модель $[x_{k-N}, ..., x_{k-1}]$ и предсказываем $x_k$

In [1]:
import torch
from torch import nn, Tensor
from torch.utils.data import DataLoader, Dataset, TensorDataset
from typing import Any, List, Tuple
from sklearn.metrics import mean_squared_error
import numpy as np

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd drive/MyDrive/

/content/drive/MyDrive


In [5]:
import json
with open('bitmex.json') as f:
    ask = np.array(json.load(f))
len(ask)

6036368

In [57]:
from tqdm import tqdm
def run_epoch(model, dataloader, criterion, device='cpu', optimizer=None, do_train=True, scheduler=None):
    loss_log, preds, gts = [], [], []
    prefix = ['Val', 'Train'][do_train] + ' - {}'
    model.train(do_train)

    for x_batch, y_batch in tqdm(dataloader, position=0):
        data = x_batch.to(device)
        target = y_batch.to(device)

        if do_train:
            optimizer.zero_grad()

        with torch.inference_mode(not do_train):
            output = model(data)
            loss = criterion(output, target).cpu()
            
        preds.append(output.detach().cpu())
        gts.append(y_batch.detach().cpu())
        loss_log.append(loss.item())

        if not do_train:
            continue
    
        loss.backward()
        optimizer.step()

        if scheduler:
            scheduler.step()
    return torch.cat(gts), torch.cat(preds), loss_log
    
def train(model, dataloaders, optimizer, criterion, n_epochs, device='cpu', scheduler=None, submit=False):
    prefix = 'Full val - {}'
    for epoch in tqdm(range(n_epochs), position=0):
        print("Epoch {0} of {1}".format(epoch, n_epochs))

        train_targets, train_preds, train_loss = run_epoch(
            model=model,
            dataloader=dataloaders['train'],
            criterion=criterion, 
            optimizer=optimizer,
            do_train=True,
            scheduler=scheduler,
            device=device,
        )
        print("Train MSE: ", mean_squared_error(train_targets, train_preds), end='   ')
        if not submit:
            val_targets, val_preds, val_loss = run_epoch(
                model=model,
                dataloader=dataloaders['val'],
                criterion=criterion, 
                optimizer=None,
                do_train=False,
                scheduler=None,
                device=device,
            )
            print("Val MSE: ", mean_squared_error(val_targets, val_preds))

def inference(model, dataloader, device='cpu'):
    preds = []
    model.eval()
    for x_batch in tqdm(dataloader):
        data = x_batch[0].to(device)
        with torch.inference_mode():
            output = model(data)
            preds.append(output.detach().cpu())
            
    return torch.cat(preds)

In [58]:
class CustomDataset(Dataset):
    """
    Dataset for sampling stock data 
    Attributes:
        stock: List of stock prices.
        window: Number of previous prices to predict next.
    """
    def __init__(self, stock, window=10) -> None:
        self.stock = stock
        self.window = window
        
    def __len__(self) -> int:
        return len(self.stock) - self.window
    
    def __getitem__(self, idx) -> Tuple[Tensor, Tensor]:
        prices = torch.tensor(self.stock[idx:idx+self.window]).float()
        target = torch.tensor(self.stock[idx+self.window]).float()
        prices = prices.unsqueeze(1)
        return prices, target

Возьмем каждое 20 значение, разделим на train и val

In [99]:
ask_cp = ask[::20]

train_ask = ask_cp[:int(len(ask_cp)*0.8)]
val_ask = ask_cp[int(len(ask_cp)*0.8):]

train_ask_dataset = CustomDataset(train_ask)
val_ask_dataset = CustomDataset(val_ask)

train_ask_dataloader = DataLoader(
    train_ask_dataset,
    batch_size=64,
    pin_memory=True,
    num_workers=2,
    shuffle=False
)
val_ask_dataloader = DataLoader(
    val_ask_dataset, 
    batch_size=64, 
    pin_memory=True, 
    num_workers=2, 
    shuffle=False
)

ask_dataloaders = {'train': train_ask_dataloader, 'val': val_ask_dataloader}

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [100]:
class AskNet(nn.Module):
    def __init__(self):
        super(AskNet, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=16, num_layers=2, batch_first=True)
        self.linear = nn.Linear(16, 1)
        
    def forward(self, x):
        x, _  = self.lstm(x)
        out = self.linear(x[:,-1,:]).squeeze()
        return out

In [102]:
n_epochs = 30
ask_model = AskNet()
ask_model.to(device)
optimizer = torch.optim.Adam(ask_model.parameters(), lr=1e-2)
criterion = nn.MSELoss()
train(ask_model, ask_dataloaders, optimizer, criterion, n_epochs=n_epochs, device=device)

  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 0 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.67it/s]


Train MSE:  527330370.0   

100%|██████████| 944/944 [00:03<00:00, 250.41it/s]
  3%|▎         | 1/30 [00:29<14:01, 29.01s/it]

Val acc:  487224670.0
Epoch 1 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.83it/s]


Train MSE:  498469820.0   

100%|██████████| 944/944 [00:03<00:00, 250.90it/s]
  7%|▋         | 2/30 [00:57<13:31, 28.99s/it]

Val acc:  459522780.0
Epoch 2 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.22it/s]


Train MSE:  470456000.0   

100%|██████████| 944/944 [00:03<00:00, 248.58it/s]
 10%|█         | 3/30 [01:27<13:04, 29.05s/it]

Val acc:  432643000.0
Epoch 3 of 30


100%|██████████| 3773/3773 [00:25<00:00, 148.72it/s]


Train MSE:  443260500.0   

100%|██████████| 944/944 [00:04<00:00, 213.41it/s]
 13%|█▎        | 4/30 [01:56<12:43, 29.36s/it]

Val acc:  406579840.0
Epoch 4 of 30


100%|██████████| 3773/3773 [00:24<00:00, 152.39it/s]


Train MSE:  416881180.0   

100%|██████████| 944/944 [00:05<00:00, 183.87it/s]
 17%|█▋        | 5/30 [02:26<12:19, 29.57s/it]

Val acc:  381332500.0
Epoch 5 of 30


100%|██████████| 3773/3773 [00:24<00:00, 152.61it/s]


Train MSE:  391318050.0   

100%|██████████| 944/944 [00:04<00:00, 222.00it/s]
 20%|██        | 6/30 [02:55<11:45, 29.38s/it]

Val acc:  356900350.0
Epoch 6 of 30


100%|██████████| 3773/3773 [00:25<00:00, 147.43it/s]


Train MSE:  366570370.0   

100%|██████████| 944/944 [00:03<00:00, 248.11it/s]
 23%|██▎       | 7/30 [03:25<11:16, 29.40s/it]

Val acc:  333284450.0
Epoch 7 of 30


100%|██████████| 3773/3773 [00:25<00:00, 150.23it/s]


Train MSE:  342639840.0   

100%|██████████| 944/944 [00:03<00:00, 252.66it/s]
 27%|██▋       | 8/30 [03:54<10:43, 29.23s/it]

Val acc:  310484060.0
Epoch 8 of 30


100%|██████████| 3773/3773 [00:25<00:00, 148.50it/s]


Train MSE:  319522200.0   

100%|██████████| 944/944 [00:03<00:00, 251.17it/s]
 30%|███       | 9/30 [04:23<10:13, 29.22s/it]

Val acc:  288497300.0
Epoch 9 of 30


100%|██████████| 3773/3773 [00:25<00:00, 148.95it/s]


Train MSE:  297219970.0   

100%|██████████| 944/944 [00:04<00:00, 235.65it/s]
 33%|███▎      | 10/30 [04:52<09:45, 29.27s/it]

Val acc:  267325000.0
Epoch 10 of 30


100%|██████████| 3773/3773 [00:25<00:00, 150.34it/s]


Train MSE:  275733440.0   

100%|██████████| 944/944 [00:05<00:00, 185.58it/s]
 37%|███▋      | 11/30 [05:23<09:21, 29.56s/it]

Val acc:  246968930.0
Epoch 11 of 30


100%|██████████| 3773/3773 [00:24<00:00, 154.85it/s]


Train MSE:  255061550.0   

100%|██████████| 944/944 [00:04<00:00, 201.48it/s]
 40%|████      | 12/30 [05:52<08:49, 29.42s/it]

Val acc:  227426690.0
Epoch 12 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.10it/s]


Train MSE:  235203730.0   

100%|██████████| 944/944 [00:03<00:00, 248.42it/s]
 43%|████▎     | 13/30 [06:21<08:18, 29.33s/it]

Val acc:  208696300.0
Epoch 13 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.19it/s]


Train MSE:  216159620.0   

100%|██████████| 944/944 [00:03<00:00, 248.46it/s]
 47%|████▋     | 14/30 [06:50<07:48, 29.27s/it]

Val acc:  190782100.0
Epoch 14 of 30


100%|██████████| 3773/3773 [00:25<00:00, 147.84it/s]


Train MSE:  197930800.0   

100%|██████████| 944/944 [00:03<00:00, 247.35it/s]
 50%|█████     | 15/30 [07:19<07:19, 29.30s/it]

Val acc:  173681390.0
Epoch 15 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.15it/s]


Train MSE:  180513980.0   

100%|██████████| 944/944 [00:03<00:00, 245.03it/s]
 53%|█████▎    | 16/30 [07:48<06:49, 29.27s/it]

Val acc:  157388980.0
Epoch 16 of 30


100%|██████████| 3773/3773 [00:25<00:00, 147.77it/s]


Train MSE:  163906350.0   

100%|██████████| 944/944 [00:04<00:00, 205.67it/s]
 57%|█████▋    | 17/30 [08:19<06:23, 29.54s/it]

Val acc:  141909060.0
Epoch 17 of 30


100%|██████████| 3773/3773 [00:24<00:00, 154.68it/s]


Train MSE:  148112850.0   

100%|██████████| 944/944 [00:05<00:00, 185.53it/s]
 60%|██████    | 18/30 [08:48<05:54, 29.53s/it]

Val acc:  127239576.0
Epoch 18 of 30


100%|██████████| 3773/3773 [00:24<00:00, 152.36it/s]


Train MSE:  133130536.0   

100%|██████████| 944/944 [00:04<00:00, 216.80it/s]
 63%|██████▎   | 19/30 [09:17<05:23, 29.42s/it]

Val acc:  113380680.0
Epoch 19 of 30


100%|██████████| 3773/3773 [00:25<00:00, 148.80it/s]


Train MSE:  118957816.0   

100%|██████████| 944/944 [00:03<00:00, 251.09it/s]
 67%|██████▋   | 20/30 [09:46<04:53, 29.34s/it]

Val acc:  100334130.0
Epoch 20 of 30


100%|██████████| 3773/3773 [00:25<00:00, 150.46it/s]


Train MSE:  105598410.0   

100%|██████████| 944/944 [00:03<00:00, 249.14it/s]
 70%|███████   | 21/30 [10:15<04:22, 29.21s/it]

Val acc:  88096610.0
Epoch 21 of 30


100%|██████████| 3773/3773 [00:25<00:00, 148.52it/s]


Train MSE:  93048210.0   

100%|██████████| 944/944 [00:03<00:00, 250.10it/s]
 73%|███████▎  | 22/30 [10:45<03:53, 29.21s/it]

Val acc:  76668250.0
Epoch 22 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.17it/s]


Train MSE:  81304350.0   

100%|██████████| 944/944 [00:03<00:00, 245.51it/s]
 77%|███████▋  | 23/30 [11:14<03:24, 29.20s/it]

Val acc:  66042964.0
Epoch 23 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.47it/s]


Train MSE:  70366390.0   

100%|██████████| 944/944 [00:04<00:00, 192.42it/s]
 80%|████████  | 24/30 [11:44<02:56, 29.50s/it]

Val acc:  56223220.0
Epoch 24 of 30


100%|██████████| 3773/3773 [00:24<00:00, 154.67it/s]


Train MSE:  60235350.0   

100%|██████████| 944/944 [00:05<00:00, 186.92it/s]
 83%|████████▎ | 25/30 [12:13<02:27, 29.49s/it]

Val acc:  47209984.0
Epoch 25 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.69it/s]


Train MSE:  50911030.0   

100%|██████████| 944/944 [00:03<00:00, 242.83it/s]
 87%|████████▋ | 26/30 [12:43<01:57, 29.39s/it]

Val acc:  38999292.0
Epoch 26 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.91it/s]


Train MSE:  42388824.0   

100%|██████████| 944/944 [00:03<00:00, 247.01it/s]
 90%|█████████ | 27/30 [13:12<01:27, 29.28s/it]

Val acc:  31589230.0
Epoch 27 of 30


100%|██████████| 3773/3773 [00:25<00:00, 148.92it/s]


Train MSE:  34667936.0   

100%|██████████| 944/944 [00:03<00:00, 250.53it/s]
 93%|█████████▎| 28/30 [13:41<00:58, 29.24s/it]

Val acc:  24975522.0
Epoch 28 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.72it/s]


Train MSE:  27745590.0   

100%|██████████| 944/944 [00:03<00:00, 248.92it/s]
 97%|█████████▋| 29/30 [14:10<00:29, 29.18s/it]

Val acc:  19159734.0
Epoch 29 of 30


100%|██████████| 3773/3773 [00:25<00:00, 149.11it/s]


Train MSE:  21621932.0   

100%|██████████| 944/944 [00:04<00:00, 213.14it/s]
100%|██████████| 30/30 [14:39<00:00, 29.33s/it]

Val acc:  14135380.0





In [103]:
ask_model.eval()
ask_model(torch.tensor(val_ask[:10]).float().unsqueeze(0).unsqueeze(2).cuda())

tensor(18955.6992, device='cuda:0', grad_fn=<SqueezeBackward0>)

In [104]:
val_ask[10]

22767.0

Моделька очень сильно ошибается, никуда не годится. Возможно стоило подумать в сторону RL методов.