In [1]:
import os
import torch
import math

import pandas as pd

from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from utils import train_loop, val_loop
from model import MyDataset, CryptoTransformer, MSEPlusRank

In [21]:
# hyperparameters
NUM_FEATURES = 68

BATCH_SIZE = 64
MODEL_DIM = 64
FFN_DIM = 64
DROPOUT = 0.3
NUM_HEADS = 4
NUM_LAYERS = 2
MAX_EPOCH = 3
experiment_name = f"size_{MODEL_DIM}_{FFN_DIM}_nheads_{NUM_HEADS}_nlayers_{NUM_LAYERS}"

if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [8]:
train_length = 24236806
num_splits = 10
split_length = math.floor(train_length/num_splits)

processed_val = pd.read_csv('./processed_data/processed_val.gz')
val_dataset = MyDataset(processed_val)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [22]:
net = CryptoTransformer(model_dim=MODEL_DIM, num_heads=NUM_HEADS, num_layers=NUM_LAYERS, dropout=DROPOUT, ffn_dim=FFN_DIM, num_features=NUM_FEATURES).to(device)
loss_fn = MSEPlusRank(lamda=1)
optimizer = torch.optim.AdamW(net.parameters())
total_num = sum(p.numel() for p in net.parameters())
trainable_num = sum(p.numel() for p in net.parameters() if p.requires_grad)
print(f"Total number of parameters: {total_num/1e3:.0f}K, number of trainable parameters: {trainable_num/1e3:.0f}K")

Total number of parameters: 80K, number of trainable parameters: 80K


In [23]:
if not os.path.exists(f'./checkpoint/{experiment_name}'):
    os.mkdir(f'./checkpoint/{experiment_name}')
if not os.path.exists(f'./logs/{experiment_name}'):
    os.mkdir(f'./logs/{experiment_name}')

In [24]:
min_val_loss = float('inf')
best_epoch = 1
best_step = 1
step = 0

tb = SummaryWriter(log_dir=f'./logs/{experiment_name}/')
for t in range(MAX_EPOCH):
    for i in range(num_splits):
        print(f"Epoch {t+1}, Train Split: {i+1}\n--------------------------")
        processed_train = pd.read_csv('./processed_data/processed_train.gz', skiprows=range(1,split_length*2), nrows=split_length)        
        train_dataset = MyDataset(processed_train)
        train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        train_loss = train_loop(train_dataloader, net, loss_fn, optimizer, device)

        val_loss = val_loop(val_dataloader, net, loss_fn, device)
        tb.add_scalar("Train Loss", train_loss, t*num_splits+i+1)
        tb.add_scalar("Val Loss", val_loss, t*num_splits+i+1)
    
        torch.save(net, f"./checkpoint/{experiment_name}/epoch_{t+1}_step_{i+1}.pt")
        if val_loss < min_val_loss:
            best_epoch = t+1
            best_step = i+1
            min_val_loss = val_loss
print(f"best epoch: {best_epoch}, best step: {best_step}, minimun validations loss: {min_val_loss:.2e}")

Epoch 1, Train Split: 1
--------------------------


100%|██████████| 3210/3210 [02:01<00:00, 26.39it/s, train loss=0.0148]
100%|██████████| 1266/1266 [00:34<00:00, 37.02it/s, val loss=0.0451]


Epoch 1, Train Split: 2
--------------------------


100%|██████████| 3210/3210 [01:56<00:00, 27.65it/s, train loss=7.82e-5] 
100%|██████████| 1266/1266 [00:31<00:00, 39.75it/s, val loss=0.0211]


Epoch 1, Train Split: 3
--------------------------


100%|██████████| 3210/3210 [01:48<00:00, 29.48it/s, train loss=3.96e-5]
100%|██████████| 1266/1266 [00:31<00:00, 40.07it/s, val loss=0.00955]


Epoch 1, Train Split: 4
--------------------------


100%|██████████| 3210/3210 [01:51<00:00, 28.79it/s, train loss=3.35e-5]
100%|██████████| 1266/1266 [00:33<00:00, 38.20it/s, val loss=0.0079] 


Epoch 1, Train Split: 5
--------------------------


100%|██████████| 3210/3210 [01:58<00:00, 27.01it/s, train loss=3.2e-5] 
100%|██████████| 1266/1266 [00:32<00:00, 38.41it/s, val loss=0.00239]


Epoch 1, Train Split: 6
--------------------------


100%|██████████| 3210/3210 [01:53<00:00, 28.37it/s, train loss=3.12e-5]
100%|██████████| 1266/1266 [00:36<00:00, 34.38it/s, val loss=0.000506]


Epoch 1, Train Split: 7
--------------------------


100%|██████████| 3210/3210 [01:57<00:00, 27.29it/s, train loss=3.08e-5]
100%|██████████| 1266/1266 [00:33<00:00, 38.34it/s, val loss=0.000119]


Epoch 1, Train Split: 8
--------------------------


100%|██████████| 3210/3210 [01:57<00:00, 27.33it/s, train loss=3.03e-5]
100%|██████████| 1266/1266 [00:32<00:00, 38.61it/s, val loss=3.35e-5]


Epoch 1, Train Split: 9
--------------------------


100%|██████████| 3210/3210 [01:49<00:00, 29.45it/s, train loss=3.01e-5]
100%|██████████| 1266/1266 [00:32<00:00, 39.34it/s, val loss=5.61e-5]


Epoch 1, Train Split: 10
--------------------------


100%|██████████| 3210/3210 [01:56<00:00, 27.56it/s, train loss=2.97e-5]
100%|██████████| 1266/1266 [00:33<00:00, 37.34it/s, val loss=2.19e-5]


Epoch 2, Train Split: 1
--------------------------


 66%|██████▌   | 2106/3210 [01:14<00:42, 26.21it/s, train loss=2.94e-5]