In [1]:
import argparse
import json
import pytorch_lightning as pl
from argparse import Namespace
from model import DTSModel
from datamodules.csvdatamodule import CsvDataModule
from datamodules.hivedatamodule import HiveDataModule
from datamodules.sqldatamodule import SqlDataModule
from datamodules.s3datamodule import S3DataModule
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from dataclasses import dataclass
import torch

In [2]:
@dataclass
class Config:
    data: dict
    model: dict
    train:dict
        
config = Config(
    data= {
        "data": "csv",
        "data_params": {
            "dataset": "tst",
            "num_feature": 5,
            "enc_seq_len": 30,
            "dec_seq_len": 5,
            "tgt_seq_len": 5,
            "batch_size": 2,
            "train_path": "data/QQQ_train.csv",
            "val_path": "data/QQQ_val.csv",
            "test_path": "data/QQQ_test.csv"
        }
    },
    model= {
        "model_name": "tst",
        "model_params": {
            "input_size": 5,
            "enc_seq_len": 30,
            "dec_seq_len": 5,
            "out_seq_len": 5,
            "dim_val": 512,
            "n_encoder_layers": 4,
            "n_decoder_layers": 4,
            "n_heads": 8,
            "dropout_encoder": 0.2,
            "dropout_decoder": 0.2,
            "dropout_pos_enc": 0.1,
            "dim_feedforward_encoder": 2048,
            "dim_feedforward_decoder": 2048,
            "num_predicted_features": 1,
            "lr": 1e-4
        },
        "loss_fn_type": "mse",
        "loss_params": {}
    },
    train=  {
        "accelerator": "gpu",
        "devices": 1,
        "strategy": 'ddp',
        "max_epochs": 10
    }
)

In [3]:
pl.seed_everything(42, workers=True)
logger = TensorBoardLogger('logs/', name=config.model['model_name'])

Global seed set to 42


In [4]:
# Create LightningDataModule
data_module = CsvDataModule(config.data['data_params'])


In [5]:
test = data_module.train_data[:,4].reshape(-1, 1)
test.shape

(2960, 1)

In [6]:
next(iter(data_module.train_dataloader()))

[tensor([[[-1.3370, -1.3235, -1.3383, -1.2842,  2.8143],
          [-1.3149, -1.3014, -1.3150, -1.2704,  5.1295],
          [-1.3026, -1.2994, -1.3053, -1.2621,  2.7092],
          [-1.3042, -1.3005, -1.3083, -1.2633,  3.5140],
          [-1.3053, -1.3070, -1.3154, -1.2751,  3.1127],
          [-1.3083, -1.3033, -1.3108, -1.2677,  2.4459],
          [-1.2952, -1.2897, -1.2957, -1.2539,  2.6249],
          [-1.2906, -1.2887, -1.2940, -1.2546,  2.2336],
          [-1.2930, -1.2918, -1.2982, -1.2602,  2.3819],
          [-1.2998, -1.2989, -1.3050, -1.2614,  2.4049],
          [-1.2904, -1.2830, -1.2944, -1.2497,  3.8490],
          [-1.2853, -1.2819, -1.2876, -1.2487,  2.5615],
          [-1.2926, -1.2927, -1.3016, -1.2637,  2.3511],
          [-1.3021, -1.2940, -1.3013, -1.2574,  3.0492],
          [-1.2960, -1.2814, -1.2963, -1.2560,  3.6582],
          [-1.2880, -1.2876, -1.2947, -1.2515,  4.1761],
          [-1.2826, -1.2751, -1.2853, -1.2434,  2.9146],
          [-1.2813, -1.2730, -1

In [7]:
# Create LightningModule
device = torch.device('cuda') if config.train['accelerator']=='gpu' else torch.device('cpu')
model = DTSModel(config.model, device)
model.to(device)

DTSModel(
  (model): TSTModel(
    (encoder_input_layer): Linear(in_features=5, out_features=512, bias=True)
    (decoder_input_layer): Linear(in_features=5, out_features=512, bias=True)
    (linear_mapping): Linear(in_features=512, out_features=5, bias=True)
    (positional_encoding_layer): PositionalEncoder(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
          )
          (linear1): Linear(in_features=512, out_features=2048, bias=True)
          (dropout): Dropout(p=0.2, inplace=False)
          (linear2): Linear(in_features=2048, out_features=512, bias=True)
          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(

In [8]:
# Callback to save the model checkpoint
checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        dirpath='./checkpoints/',
        filename='model-{epoch:02d}-{val_loss:.2f}',
        save_top_k=3,
        mode='min',
        save_last=True,
        every_n_epochs=1,  # Save checkpoint every epoch
    )

# Create Trainer
trainer = pl.Trainer(
        accelerator=config.train['accelerator'],
        devices=config.train['devices'],
        #strategy=config.train['strategy'],
        max_epochs=config.train['max_epochs'],
        callbacks=[
            #EarlyStopping(monitor='val_loss'),
            LearningRateMonitor(logging_interval='step'),
            checkpoint_callback
        ],
        logger=logger,
    )

# Train the model
trainer.fit(model, datamodule=data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: logs/tst
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type     | Params
-------------------------------------
0 | model   | TSTModel | 29.4 M
1 | loss_fn | MSE      | 0     
-------------------------------------
29.4 M    Trainable params
0         Non-trainable params
29.4 M    Total params
117.737   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

ValueError: too many values to unpack (expected 2)

In [None]:
preds = trainer.test(datamodule=data_module)

In [None]:
preds

In [None]:
import numpy as np
test_loader = data_module.test_dataloader()
predictions = []
model.to(device)
for batch_idx, (inputs, labels) in enumerate(test_loader):
    if batch_idx !=0: continue
    
    outputs = model(inputs.to(device))
    outputs = data_module.target_denormalize(outputs.detach().cpu().numpy().squeeze())
    labels = data_module.target_denormalize(labels.detach().cpu().numpy().squeeze())

    predictions.append(list(zip(labels, outputs)))
#predictions = np.concatenate(predictions, axis=0)

In [None]:
predictions