In [None]:
import os
import torch
import math

import pandas as pd
import torch.nn as nn

from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from utils import train_loop, val_loop
from model import MyDataset, CryptoTransformer

In [None]:
# hyperparameters
BATCH_SIZE = 64
NUM_FEATURES = 68
MODEL_DIM = 64
FFN_DIM = 128
DROPOUT = 0.3
NUM_HEADS = 2
NUM_LAYERS = 2
MAX_EPOCH = 10
experiment_name = 'model_v1.0'

if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

In [None]:
train_length = 24236806
num_splits = 10
split_length = math.floor(train_length/num_splits)

processed_val = pd.read_csv('./processed_data/processed_val.gz')
val_dataset = MyDataset(processed_val)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
net = CryptoTransformer(model_dim=MODEL_DIM, num_heads=NUM_HEADS, num_layers=NUM_LAYERS, dropout=DROPOUT, ffn_dim=FFN_DIM, num_features=NUM_FEATURES).to(device)
loss_fn = nn.MSELoss(reduction='mean')
optimizer = torch.optim.AdamW(net.parameters())
total_num = sum(p.numel() for p in net.parameters())
trainable_num = sum(p.numel() for p in net.parameters() if p.requires_grad)
print(f"Total number of parameters: {total_num/1e3:.0f}K, number of trainable parameters: {trainable_num/1e3:.0f}K")

In [None]:
if not os.path.exists(f'./checkpoint/{experiment_name}'):
    os.mkdir(f'./checkpoint/{experiment_name}')
if not os.path.exists(f'./logs/{experiment_name}'):
    os.mkdir(f'./logs/{experiment_name}')

In [None]:
min_val_loss = float('inf')
best_epoch = 1
best_step = 1
step = 0

tb = SummaryWriter(log_dir=f'./logs/experiment_name/')
for t in range(1, MAX_EPOCH+1):
    for i in range(num_splits):
        print(f"Epoch {t}, Train Split: {i+1}\n--------------------------")
        processed_train = pd.read_csv('./processed_data/processed_train.gz', skiprows=range(1,split_length*2), nrows=split_length)        
        train_dataset = MyDataset(processed_train)
        train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        train_loss = train_loop(train_dataloader, net, loss_fn, optimizer, device)

        val_loss = val_loop(val_dataloader, net, loss_fn, device)
        tb.add_scalar("Train Loss", train_loss, step)
        tb.add_scalar("Val Loss", val_loss, step)
    
        torch.save(net, f"./checkpoint/{experiment_name}/epoch_{t}_step_{i}.pt")
        if val_loss < min_val_loss:
            best_epoch = t
            best_step = i
            min_val_loss = val_loss
print(f"best epoch: {best_epoch}, best step: {best_step}, minimun validations loss: {min_val_loss:.2e}")