In [1]:
import argparse
import json
import pytorch_lightning as pl
from argparse import Namespace
from model import DTSModel
from datamodules.csvdatamodule import CsvDataModule
from datamodules.hivedatamodule import HiveDataModule
from datamodules.sqldatamodule import SqlDataModule
from datamodules.s3datamodule import S3DataModule
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from dataclasses import dataclass
import torch

In [2]:
@dataclass
class Config:
    data: dict
    model: dict
    train:dict
        
config = Config(
    data= {
        "data": "csv",
        "data_params": {
            "dataset": "rnn",
            "num_feature": 5,
            "seq_len": 20,
            "tgt_len": 2,
            "batch_size": 32,
            "train_path": "data/QQQ_train.csv",
            "val_path": "data/QQQ_val.csv",
            "test_path": "data/QQQ_test.csv"
        }
    },
    model= {
        "model_name": "custom_rnn",
        "model_params": {
            "input_size": 5,
            "hidden_size": 32,
            "output_size": 2,
            "num_layers": 2,
            "lr": 1e-4,
        },
        "loss_fn_type": "mse",
        "loss_params": {}
    },
    train=  {
        "accelerator": "gpu",
        "devices": 1,
        "strategy": 'ddp',
        "max_epochs": 50
    }
)

In [3]:
pl.seed_everything(42, workers=True)
logger = TensorBoardLogger('logs/', name=config.model['model_name'])

Global seed set to 42


In [4]:
# Create LightningDataModule
data_module = CsvDataModule(config.data['data_params'])


In [5]:
test = data_module.train_data[:,4].reshape(-1, 1)
test.shape

(2960, 1)

In [6]:
next(iter(data_module.train_dataloader()))

[tensor([[[-1.3370, -1.3235, -1.3383, -1.2842,  2.8143],
          [-1.3149, -1.3014, -1.3150, -1.2704,  5.1295],
          [-1.3026, -1.2994, -1.3053, -1.2621,  2.7092],
          ...,
          [-1.2813, -1.2730, -1.2808, -1.2429,  2.2785],
          [-1.2797, -1.2754, -1.2807, -1.2464,  1.9115],
          [-1.2760, -1.2676, -1.2752, -1.2399,  2.9222]],
 
         [[-1.3149, -1.3014, -1.3150, -1.2704,  5.1295],
          [-1.3026, -1.2994, -1.3053, -1.2621,  2.7092],
          [-1.3042, -1.3005, -1.3083, -1.2633,  3.5140],
          ...,
          [-1.2797, -1.2754, -1.2807, -1.2464,  1.9115],
          [-1.2760, -1.2676, -1.2752, -1.2399,  2.9222],
          [-1.2658, -1.2597, -1.2697, -1.2352,  3.0065]],
 
         [[-1.3026, -1.2994, -1.3053, -1.2621,  2.7092],
          [-1.3042, -1.3005, -1.3083, -1.2633,  3.5140],
          [-1.3053, -1.3070, -1.3154, -1.2751,  3.1127],
          ...,
          [-1.2760, -1.2676, -1.2752, -1.2399,  2.9222],
          [-1.2658, -1.2597, -1.2697,

In [7]:
# Create LightningModule
device = torch.device('cuda') if config.train['accelerator']=='gpu' else torch.device('cpu')
model = DTSModel(config.model, device)
model.to(device)

DTSModel(
  (model): CustomRNN(
    (rnn): LSTM(5, 32, num_layers=2, batch_first=True)
    (dropout): Dropout(p=0.2, inplace=False)
    (fc): Linear(in_features=32, out_features=2, bias=True)
  )
  (loss_fn): MSE()
)

In [8]:
# Callback to save the model checkpoint
checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        dirpath='./checkpoints/',
        filename='model-{epoch:02d}-{val_loss:.2f}',
        save_top_k=3,
        mode='min',
        save_last=True,
        every_n_epochs=1,  # Save checkpoint every epoch
    )

# Create Trainer
trainer = pl.Trainer(
        accelerator=config.train['accelerator'],
        devices=config.train['devices'],
        #strategy=config.train['strategy'],
        max_epochs=config.train['max_epochs'],
        callbacks=[
            #EarlyStopping(monitor='val_loss'),
            LearningRateMonitor(logging_interval='step'),
            checkpoint_callback
        ],
        logger=logger,
    )

# Train the model
trainer.fit(model, datamodule=data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type      | Params
--------------------------------------
0 | model   | CustomRNN | 13.5 K
1 | loss_fn | MSE       | 0     
--------------------------------------
13.5 K    Trainable params
0         Non-trainable params
13.5 K    Total params
0.054     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


In [9]:
preds = trainer.test(datamodule=data_module)

  rank_zero_warn(
Restoring states from the checkpoint path at /hd1/dl/deep_time_series_framework/checkpoints/model-epoch=48-val_loss=1.03.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /hd1/dl/deep_time_series_framework/checkpoints/model-epoch=48-val_loss=1.03.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           0.2675507962703705
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [10]:
preds

[{'test_loss': 0.2675507962703705}]

In [11]:
import numpy as np
test_loader = data_module.test_dataloader()
predictions = []
model.to(device)
for batch_idx, (inputs, labels) in enumerate(test_loader):
    if batch_idx !=0: continue
    
    outputs = model(inputs.to(device))
    outputs = data_module.target_denormalize(outputs.detach().cpu().numpy().squeeze())
    labels = data_module.target_denormalize(labels.detach().cpu().numpy().squeeze())

    predictions.append(list(zip(labels, outputs)))
#predictions = np.concatenate(predictions, axis=0)

In [12]:
predictions

[[(array([362.84594727, 365.79421997]),
   array([263.84976, 261.97858], dtype=float32)),
  (array([365.79421997, 350.96350098]),
   array([293.55426, 314.81058], dtype=float32)),
  (array([350.96350098, 355.39089966]),
   array([304.72   , 300.70328], dtype=float32)),
  (array([355.39089966, 352.5319519 ]),
   array([291.75934, 304.30292], dtype=float32)),
  (array([352.5319519 , 356.49276733]),
   array([299.433 , 288.2583], dtype=float32)),
  (array([356.49276733, 364.04714966]),
   array([251.008 , 291.6853], dtype=float32)),
  (array([364.04714966, 355.80780029]),
   array([265.15878, 301.03018], dtype=float32)),
  (array([355.80780029, 344.52102661]),
   array([229.24025, 235.74048], dtype=float32)),
  (array([344.52102661, 344.94781494]),
   array([302.04037, 292.16858], dtype=float32)),
  (array([344.94781494, 353.52462769]),
   array([305.80927, 296.30484], dtype=float32)),
  (array([353.52462769, 353.43533325]),
   array([259.57758, 253.1899 ], dtype=float32)),
  (array([353.