In [1]:
# !git clone https://github.com/booydar/algotrade
# !pip install einops entmax

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
# !mkdir logs
# !mkdir checkpoints

In [4]:
import torch
import torch.nn as nn
import numpy as np
import random
import time
import sys
from sklearn.model_selection import train_test_split

# sys.path.append('algotrade/NN')
# from algotrade.NN.x_transformers.x_transformers import *
# from algotrade.NN.run_experiment import *
# from algotrade.NN.generate_data import *

sys.path.append('NN')
from x_transformers.NN.x_transformers import *
from x_transformers.NN.x_transformers.x_transformers import *
from x_transformers.NN.run_experiment import *
# from x_transformers.NN.generate_data import *

## Variables

In [5]:
from sklearn.model_selection import ParameterGrid

TAG = 'test'

TASK_NAME = 'price'
TRAIN_SIZE = 100_000
VAL_SIZE = 2_000
TEST_SIZE = 10_000
NUM_INITS = 4


NUM_BATCHES = int(4e5)
BATCH_SIZE = 128
GENERATE_EVERY  = 10000
NUM_TOKENS = 10 + 2
ENC_SEQ_LEN = 24
DEC_SEQ_LEN = 48

INPUT_LEN = 24

#### Load data

In [24]:
class data_loader:
    def __init__(self, mode, path='data', tgt_len=24, batch_size=32, tgt_dim=2, device='cpu'):
        X, y = np.load(f'{path}/X_{mode}.npy'), np.load(f'{path}/y_{mode}.npy')        
        X = torch.tensor(X)

        slices_x = [X[i:tgt_len + i] for i in range(X.shape[0] - tgt_len)]
        src = torch.stack(slices_x)
        tgt = y[tgt_len-1:-1]
        
        if tgt_dim is not None:
            tgt = tgt[:, [0, tgt_dim]]
        
        perm_ind = torch.randperm(src.shape[0])
        src, tgt = src[perm_ind], tgt[perm_ind]
        self.src, self.tgt = torch.tensor(src).float(), torch.tensor(tgt).float()

        self.data_size = self.src.shape[0]
        self.data_ptr = 0

        self.batch_size = batch_size
        self.device = device

    def __next__(self):
        if self.data_ptr + self.batch_size > self.data_size:
            self.data_ptr = 0

        src = self.src[self.data_ptr: self.data_ptr + self.batch_size].to(device=self.device)
        tgt = self.tgt[self.data_ptr: self.data_ptr + self.batch_size].to(device=self.device)
        
        src_mask = tgt_mask = None
            
        self.data_ptr = (self.data_ptr + self.batch_size) % self.data_size

        return src, tgt, src_mask, tgt_mask

### Run

In [25]:

gen_train = data_loader(path=f'data/BTCUSD', mode='train', batch_size=BATCH_SIZE, device='cuda')
gen_val = data_loader(path=f'data/BTCUSD', mode='val', batch_size=BATCH_SIZE, device='cuda')
gen_test = data_loader(path=f'data/BTCUSD', mode='test', batch_size=BATCH_SIZE, device='cuda')

  self.src, self.tgt = torch.tensor(src).float(), torch.tensor(tgt).float()


In [30]:
class CXTransformer(nn.Module):
    def __init__(
        self,
        *,
        dim,
        tie_token_emb = False,
        **kwargs
    ):
        super().__init__()
        enc_kwargs, kwargs = groupby_prefix_and_trim('enc_', kwargs)
        dec_kwargs, kwargs = groupby_prefix_and_trim('dec_', kwargs)
        
        assert 'dim' not in enc_kwargs and 'dim' not in dec_kwargs, 'dimension of either encoder or decoder must be set with `dim` keyword'
        enc_transformer_kwargs = pick_and_pop(['max_seq_len', 'dim_in', 'use_pos_emb'], enc_kwargs)
        # enc_transformer_kwargs['num_memory_tokens'] = enc_kwargs.pop('num_memory_tokens', None)

        dec_transformer_kwargs = pick_and_pop(['max_seq_len', 'dim_in', 'dim_out'], dec_kwargs)

        self.encoder = ContinuousTransformerWrapper(
            **enc_transformer_kwargs,
            attn_layers = Encoder(dim = dim, **enc_kwargs)
        )

        self.decoder = ContinuousTransformerWrapper(
            **dec_transformer_kwargs,
            attn_layers = Decoder(dim = dim, cross_attend = True, **dec_kwargs)
        )

        if tie_token_emb:
            self.decoder.token_emb = self.encoder.token_emb

        self.decoder = AutoregressiveWrapper(self.decoder)

    @torch.no_grad()
    def generate(self, seq_in, seq_out_start, seq_len, src_mask = None, **kwargs):
        encodings = self.encoder(seq_in, return_embeddings = True, mask = src_mask)
        return self.decoder.generate(seq_out_start, seq_len, context = encodings, context_mask = src_mask, **kwargs)

    def forward(self, src, tgt, src_mask = None, tgt_mask = None):
        enc = model.encoder(src, mask = src_mask, return_embeddings = True)
    
        gen_token = -10_000 * torch.ones_like(src[:, :1, :])

        out = model.decoder.net(gen_token, context=enc)
        xo = tgt[:, 1:]
        loss = F.mse_loss(out.transpose(1, 2)[:, 0], xo)
        return loss

In [31]:
LEARNING_RATE = 0.0001

model_parameters = ParameterGrid({'dim': [128],
    'tie_token_embeds': [True],
    'return_tgt_loss': [True],
    'enc_depth': [2],
    'enc_heads': [4],
    'dec_depth': [2],
    'dec_heads': [4],
    'enc_max_seq_len': [24],
    'dec_max_seq_len': [1],
    'enc_num_memory_tokens': [0],
    'enc_dim_in': [16],
    'dec_dim_in': [16],
    'enc_dim_out': [1],
    'dec_dim_out': [1],
    'enc_emb_dim': [128],
    'enc_emb_dropout': [0.],
    'enc_use_pos_emb': [False]
})

param = list(model_parameters)[0]


In [56]:
WINDOW_SIZE = 4
PATIENCE = 10
def train_validate_model(model, train_generator, val_generator, optim, model_name, config, generate_every=1e2, num_batches=1e3, verbose=True, overfit_stop=True, print_file=None, tag='', log_path='logs/', head_start=15):
    
    t0 = time.time()
    
    log_dir = log_path + model_name.split('_')[0]
    writer = SummaryWriter(log_dir=log_dir)
    if print_file is None:
        print_file = f"{log_dir}/{model_name}_cout_log.txt"

    validation_scores = []
    for i in range(num_batches):

        model.train()
        
        src, tgt, src_mask, tgt_mask = next(train_generator)
        loss = model(src, tgt, src_mask=src_mask, tgt_mask=tgt_mask)
        loss.backward()

        loss_value = loss.item()        
        writer.add_scalars("/train/loss", {model_name: loss_value}, i)
#         if loss_value < 1e-10:
#             break

        optim.step()
        optim.zero_grad()

        if i != 0 and i % generate_every == 0:
            model.eval()
            
            with torch.no_grad():
                src, tgt, src_mask, tgt_mask = next(val_generator)
                
                enc = model.encoder(src, mask = src_mask, return_embeddings = True)
    
                gen_token = -10_000 * torch.ones_like(src[:, :1, :])

                out = model.decoder.net(gen_token, context=enc)
                xo = tgt[:, 1:]
                val_loss = F.mse_loss(out.transpose(1, 2)[:, 0], xo)
                val_loss_value = val_loss.item()

            writer.add_scalars("/val/loss", {model_name: val_loss_value}, i)

            validation_scores.append(val_loss_value) 
    
            if verbose:
                with open(print_file, 'a') as f:
                    f.write(f"\n\ninput:  {src[0][-5:, 0]}")
                    f.write(f"\npredicted output:  {out[0]}")
                    f.write(f"\ncorrect output:  {xo[0]}")
                    f.write(f"\ntime: {round(time.time() - t0)}")
                    t0 = time.time()
            
            # save checkpoint
            if max(validation_scores) == validation_scores[-1]:
                os.system(f'mkdir {log_path}checkpoints')
                os.system(f'mkdir {log_path}checkpoints/{model_name.split("_")[0]}')
                os.system(f'mkdir {log_path}checkpoints/{model_name.split("_")[0]}/validation')
                save_path = f'{log_path}checkpoints/{model_name.split("_")[0]}/validation/{model_name}_{tag}_maxval.pt'
                save_checkpoint(save_path, model, optim, i, config)
                
            if i // generate_every < head_start:
                continue
                
            # early stopping
            smoothed_val_scores = [np.mean(validation_scores[i-WINDOW_SIZE+1:i]) for i in range(WINDOW_SIZE-1, len(validation_scores))]
            
            if overfit_stop and max(smoothed_val_scores) > max(smoothed_val_scores[-PATIENCE:]):
                break
                
    # save checkpoint
    save_path = f'{log_path}checkpoints/{model_name.split("_")[0]}/{model_name}_{tag}.pt'
    os.system(f'mkdir {log_path}checkpoints/{model_name.split("_")[0]}')
    save_checkpoint(save_path, model, optim, i, config)

    writer.flush()


def test_model(model, test_generator, model_name, param, task_name, tag, num_batches=50, log_path='logs/_test_results.csv'):
    model.eval()

    loss_values = []
    with torch.no_grad():
        for bn in range(num_batches):
            src, tgt, src_mask, tgt_mask = next(test_generator)
            
            enc = model.encoder(src, mask = src_mask, return_embeddings = True)

            gen_token = -10_000 * torch.ones_like(src[:, :1, :])

            out = model.decoder.net(gen_token, context=enc)
            xo = tgt[:, 1:]
            loss = F.mse_loss(out.transpose(1, 2)[:, 0], xo)
            loss_values.append(loss.cpu().item())

    param['tag'] = tag
    param['task_name'] = task_name
    param['model_name'] = model_name
    param['loss'] = np.mean(loss_values)

    if os.path.exists(log_path):
        df = pd.read_csv(log_path)
        df = df.append(param, ignore_index=True)
    else: 
        df = pd.DataFrame([param])
    df.to_csv(log_path, index=False) 

In [57]:
GENERATE_EVERY = 100
NUM_BATCHES = 1000

In [58]:
drive_path = 'stocks_logs/'
print_file = f'{drive_path}{TAG}_logs.txt'
t = time.time()
for init_num in range(1):
    with open(print_file, 'a') as f:
        f.write('\n\nInit number ' + str(init_num)+'\n')
    for i, param in enumerate(list(model_parameters)):
        with open(print_file, 'a') as f:
            f.write('\n\n' + str(param)+'\n')
        # param['enc_depth'], param['enc_heads'] = param['depth,heads']
        # param['dec_depth'], param['dec_heads'] = param['depth,heads']
        # param.pop('depth,heads')

        with open(print_file, 'a') as f:
            f.write(f'{i / len(model_parameters) * 100}%')
        model = CXTransformer(**param).cuda()

        model_name = f"{TASK_NAME}{INPUT_LEN}_dim{param['dim']}d{param['enc_depth']}h{param['enc_heads']}M{param['enc_num_memory_tokens']}l{param['enc_max_seq_len']}_{TAG}_v{init_num}"

        optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
        train_validate_model(model, 
                        train_generator=gen_train, 
                        val_generator=gen_val, 
                        optim=optim, 
                        model_name=model_name, 
                        config=param,
                        num_batches=NUM_BATCHES,
                        generate_every=GENERATE_EVERY,
                        print_file=print_file,
                        tag=TAG,
                        overfit_stop=False)
        test_model(model, gen_test, model_name, param, TASK_NAME, tag=TAG, log_path=drive_path+'test_results.csv')
        with open(print_file, 'a') as f:
            f.write(f'\nTotal time: {time.time() - t}\n')
        t = time.time()