In [1]:
%load_ext autoreload
%autoreload 2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import h5py
import numpy as np
import pandas as pd

from torch.utils.data import DataLoader
from sequencedataset import CandleSeriesDataset

In [2]:
#load data from hdf5 file and create a dataset
with h5py.File("data/nq17-23_1min_seq.hdf5", "r") as f:
    total_records = f["data"].shape[0]
    # start_index = int(total_records*0.75)
    # stop_index = int(total_records*0.80)
    dataset = f["data"][:] #[start_index:stop_index]


split_idx = int(len(dataset)*0.8)
train_data = dataset[:split_idx]
test_data = dataset[split_idx:]

train_dataset = CandleSeriesDataset(train_data)
test_dataset = CandleSeriesDataset(test_data)

batch_size = 256
#load data into dataloader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [3]:
import torch
import torch.nn as nn
import math

class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        x = x + self.pe[:x.size(1)].transpose(0, 1)  # Изменение размеров для соответствия формату batch_first
        return self.dropout(x)

class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_heads, num_layers, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.pos_encoder = PositionalEncoding(embed_dim, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.output_layer = nn.Linear(embed_dim, vocab_size)
        self.embed_dim = embed_dim
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.constant_(module.bias, 0)
        elif isinstance(module, nn.Embedding):
            nn.init.normal_(module.weight, mean=0, std=0.02)  

    def forward(self, src):
        src = self.embedding(src) * math.sqrt(self.embed_dim)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.output_layer(output)
        return output[:, -1, :] #return only last output


In [4]:
#set of model parameters
vocab_size = dataset.max()
embed_dim = 32
num_heads = 4
num_layers = 2
dropout = 0.1

#check if we have CUDA or MPS and setup respecive device, if not CUDA nor MPS is available, then use CPU
def init_model():
    device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')

    #device = torch.device('cpu')

    print(f"Device to be used: {device}")
    #Initialize model
    torch.manual_seed(42)
    model = TransformerModel(vocab_size, embed_dim, num_heads, num_layers, dropout)

    model = model.to(device)
    #print(model)
    #print model device
    next(model.parameters()).device
    return model, device
    

In [5]:
# # test model forward pass
# for idx, (data, target) in enumerate(train_loader):
#     model, device = init_model()
#     data = data.to(device)
#     target = target.to(device)
#     output = model(data)
#     print(output.shape)
#     print(data.shape)
#     print(target.shape)
#     break

In [16]:
#define writer for tensorboard
import os
delete_logs = True
if delete_logs:
    #use python os package to delete logs including files in subfolders and subfolders itself
    for root, dirs, files in os.walk('./runs/nq_llm_0'):
        for file in files:
            os.remove(os.path.join(root, file))
        for dir in dirs:
            for fils in os.listdir(os.path.join(root, dir)):
                os.remove(os.path.join(root, dir, fils))
            os.rmdir(os.path.join(root, dir))   
    
writer = SummaryWriter('runs/nq_llm_0')

In [6]:
#initialize model, loss function and optimizer
model, device = init_model()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
best_vloss = float('inf')

Device to be used: mps


In [48]:
# #caclulate number of parameters in the model
# def count_parameters(model):
#     return sum(p.numel() for p in model.parameters() if p.requires_grad)
# count_parameters(model)

349563

In [8]:
#restore model  and optimizer state from checkpoint './models/nq-llm_0.pth'

checkpoint = torch.load('./models/nq-llm_0.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
best_vloss = checkpoint['vloss']
#print optimizer state
for param_group in optimizer.param_groups:
    print(param_group['lr'])
    print(param_group['weight_decay'])





0.0001
1e-05


In [9]:
# direct adjust optimizer learning rate and weight decay

for g in optimizer.param_groups:
    g['lr'] = 1e-4
    g['weight_decay'] = 1e-5



In [17]:
start_epoch = 0
num_epochs = start_epoch + 100
bath_limit = 10


# Main training loop
try:
    for epoch in range(start_epoch,num_epochs):
        # Train the model
        model.train()
        epoch_loss = 0
        for batch_idx, (data, labels) in enumerate(train_loader):
            data = data.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(data)
            loss = loss_fn(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{batch_idx + 1}/{len(train_loader)}], Loss: {loss.item():,.6f}', end='\r', flush=True)
            if batch_idx >= bath_limit:
                break
        
            

        #add weights and biases to tensorboard
        weights = {}
        biases = {}
        grads = {}
        for name, param in model.named_parameters():
            if 'weight' in name:
                writer.add_histogram(f'weights/{name}', param, epoch)
            elif 'bias' in name:
                writer.add_histogram(f'biases/{name}', param, epoch)
            if param.grad is not None:
                writer.add_histogram(f'grads/{name}', param.grad, epoch)



        # Test the model
        
        model.eval()
        vepoch_loss = 0
        with torch.no_grad():
            for vbatch_idx, (vdata, vlabels) in enumerate(test_loader):
                vdata = vdata.to(device)
                vlabels = vlabels.to(device)
                voutputs = model(vdata)
                vloss = loss_fn(voutputs, vlabels)
                vepoch_loss += vloss.item()
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{vbatch_idx + 1}/{len(test_loader)}], Validation Loss: {vloss.item():,.6f}', end='\r', flush=True)
                if vbatch_idx >= bath_limit:
                    break

        # Save the model checkpoint if validation loss is less than best validation loss
        if vepoch_loss/len(test_loader) < best_vloss:
            best_vloss = vepoch_loss/len(test_loader)
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'vloss': best_vloss,
                }, './models/nq-llm_0_1.pth')
            lr = next(iter(optimizer.param_groups))['lr']
            weight_decay = next(iter(optimizer.param_groups))['weight_decay']
            print(f"Model saved at epoch {epoch+1} with validation loss {vepoch_loss/len(test_loader):.6f} Learning rate: {lr:.2e} Weight decay: {weight_decay:.2e} ")
        #else - restore the model from previous checkpoint and reduce learning rate 5 times and increase weight decay 50%
        # else:
        #     checkpoint = torch.load('./models/nq-lstm.pth')
        #     model.load_state_dict(checkpoint['model_state_dict'])
        #     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            
        #     for g in optimizer.param_groups:
        #         g['lr'] = g['lr'] * 0.3
        #         g['weight_decay'] = g['weight_decay'] * 1.1
        #     print(f"Model restored from epoch {epoch} with validation loss {best_vloss/len(test_loader)}")
        #     print(f"Learning rate reduced to {g['lr']} and weight decay increased to {g['weight_decay']}")

        

        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {epoch_loss / batch_idx:,.6f}, Validation Loss: {vepoch_loss / vbatch_idx:,.6f}')
        writer.add_scalars('Loss', {'Train': epoch_loss / batch_idx, 'Test': vepoch_loss / vbatch_idx}, epoch)
        
        
        # get actual lr and weight decay from optimizer and write them to tensorboard
        lr = next(iter(optimizer.param_groups))['lr']
        weight_decay = next(iter(optimizer.param_groups))['weight_decay']
        writer.add_scalar('Learning rate', lr, epoch)
        writer.add_scalar('Weight decay', weight_decay, epoch)
        
        # write_charts_to_TB('Test data 1m candles',writer, vlabels.cpu().numpy()[:31,-1,0:4].reshape(-1,4), voutputs.cpu().numpy()[:31,-1,0:4].reshape(-1,4), epoch)
        # write_charts_to_TB('Test data sample',writer, vlabels.cpu().numpy()[30,-31:,0:4].reshape(-1,4), voutputs.cpu().numpy()[30,-31:,0:4].reshape(-1,4), epoch)



except KeyboardInterrupt:
    print(f"Interrupted at epoch {epoch} with Training loss {epoch_loss:,.6f} and Validation loss {vepoch_loss:,.6f}")
finally:
    writer.close()
    



Model saved at epoch 1 with validation loss 0.005562 Learning rate: 1.00e-04 Weight decay: 1.00e-05 
Epoch [1/100], Train Loss: 4.021925, Validation Loss: 5.450962
Model saved at epoch 2 with validation loss 0.005556 Learning rate: 1.00e-04 Weight decay: 1.00e-05 
Epoch [2/100], Train Loss: 4.072854, Validation Loss: 5.445582
Model saved at epoch 3 with validation loss 0.005546 Learning rate: 1.00e-04 Weight decay: 1.00e-05 
Epoch [3/100], Train Loss: 4.029094, Validation Loss: 5.435732
Model saved at epoch 4 with validation loss 0.005545 Learning rate: 1.00e-04 Weight decay: 1.00e-05 
Epoch [4/100], Train Loss: 4.048240, Validation Loss: 5.434364
Model saved at epoch 5 with validation loss 0.005534 Learning rate: 1.00e-04 Weight decay: 1.00e-05 
Epoch [5/100], Train Loss: 4.081314, Validation Loss: 5.423396
Model saved at epoch 6 with validation loss 0.005528 Learning rate: 1.00e-04 Weight decay: 1.00e-05 
Epoch [6/100], Train Loss: 4.000529, Validation Loss: 5.417753
Epoch [7/100], T

In [12]:
#interrim model parameters and optimizer state saving

torch.save({
            'epoch': epoch,
            'batch_step': batch_idx,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': epoch_loss,
            }, './models/nq-llm_0_interrim.pth')

In [27]:
# function which perform candle prediction in the following way:
# 1. take last 30 tockenized candles from test dataset
# 2. predict next tocken
# 3. add predicted tocken to the end of tockenized candles and remove first tocken
# 4. repeat 2-3 steps until new candle tocken (0) is predicted.
# 5. return predicted tockenized candle along with preceding 30 tockenized candles



In [35]:
with torch.no_grad():
    for idx, (vdata, vlabels) in enumerate(test_loader):
        vdata = vdata.to(device)
        vlabels = vlabels.to(device)
        voutputs = model(vdata)    
        break 
F.softmax(voutputs,dim=1).argmax(dim=1), vlabels

(tensor([  9, 212,   9, 975,   0,   9, 209,   9, 975,   0,   9, 212, 587, 975,
           0,   9, 212, 587, 975,   0,   9, 212, 587, 975,   0,   1,   9, 212,
         587, 975,   0,   9, 209,   9, 975,   0,   9, 209,   9, 975,   0,   9,
         212,   9, 975,   0,   9, 209,   9, 975,   0,   1,   2,   9, 212,   9,
         975,   0,   9, 215,  13, 975,   0,   9], device='mps:0'),
 tensor([587, 225,   9, 974,   0, 589, 232,  17, 975,   0,  11, 229, 596, 976,
           0,  10, 227, 587, 976,   0,   9, 222, 597, 975,   0,   1,  10, 209,
         599, 974,   0, 588, 229,  14, 978,   0, 594, 209,  14, 975,   0, 590,
         210,  10, 974,   0, 588, 221,  16, 979,   0,   1,   2, 587, 238,  12,
         978,   0, 587, 220,  39, 976,   0, 591], device='mps:0'))

In [36]:

# save sequense tensor to hdf5
# with h5py.File('data/nq17-23_1min_seq.hdf5', 'w') as f:
#        dataset = f.create_dataset('data', shape=seq_tensor.shape, dtype='i8')
#        dataset[:] = seq_tensor[:]
#        #save index_to_value dictionaries
#        f.create_dataset('index_to_bottom_wick', data=np.array(list(index_to_bottom_wick.items())))
#        f.create_dataset('index_to_body', data=np.array(list(index_to_body.items())))
#        f.create_dataset('index_to_top_wick', data=np.array(list(index_to_top_wick.items())))
#        f.create_dataset('index_to_open_gap', data=np.array(list(index_to_open_gap.items())))

#load index to candlestick mapping from hdf5 file
with h5py.File("data/nq17-23_1min_seq.hdf5", "r") as f:
    index_to_bottom_wick = dict(f["index_to_bottom_wick"][:])
    index_to_body = dict(f["index_to_body"][:])
    index_to_top_wick = dict(f["index_to_top_wick"][:])
    index_to_open_gap = dict(f["index_to_open_gap"][:])
    
