In [1]:
import argparse
import json
import pytorch_lightning as pl
from argparse import Namespace
from model import DTSModel
from datamodules.csvdatamodule import CsvDataModule
from datamodules.hivedatamodule import HiveDataModule
from datamodules.sqldatamodule import SqlDataModule
from datamodules.s3datamodule import S3DataModule
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from dataclasses import dataclass
import torch

In [2]:
@dataclass
class Config:
    data: dict
    model: dict
    train:dict
        
config = Config(
    data= {
        "data": "csv",
        "data_params": {
            "normalize": 1,
            "dataset": "rnn",
            "num_feature": 5,
            "seq_len": 20,
            "tgt_len": 1,
            "batch_size": 32,
            "train_path": "data/QQQ_train.csv",
            "val_path": "data/QQQ_val.csv",
            "test_path": "data/QQQ_test.csv"
        }
    },
    model= {
        "model_name": "custom_rnn",
        "model_params": {
            "input_size": 5,
            "hidden_size": 32,
            "output_size": 1,
            "num_layers": 2,
            "lr": 1e-4,
        },
        "loss_fn_type": "mse",
        "loss_params": {}
    },
    train=  {
        "accelerator": "gpu",
        "devices": 1,
        "strategy": 'ddp',
        "max_epochs": 100
    }
)

In [3]:
pl.seed_everything(42, workers=True)
logger = TensorBoardLogger('logs/', name=config.model['model_name'])

Global seed set to 42


In [4]:
# Create LightningDataModule
data_module = CsvDataModule(config.data['data_params'])


In [5]:
test = data_module.train_data[:,4].reshape(-1, 1)
test.shape

(2960, 1)

In [6]:
next(iter(data_module.train_dataloader()))

[tensor([[[-0.9595, -0.9624, -0.9599, -0.9681,  0.0411],
          [-0.9586, -0.9635, -0.9591, -0.9685, -0.1521],
          [-0.9570, -0.9614, -0.9567, -0.9661, -0.1821],
          ...,
          [-0.9647, -0.9667, -0.9668, -0.9697, -0.1458],
          [-0.9605, -0.9654, -0.9603, -0.9689, -0.2277],
          [-0.9551, -0.9574, -0.9551, -0.9614,  0.0941]],
 
         [[-0.2291, -0.2269, -0.2250, -0.2363, -0.7684],
          [-0.2233, -0.2275, -0.2247, -0.2437, -0.7591],
          [-0.2278, -0.2253, -0.2228, -0.2343, -0.7961],
          ...,
          [-0.2194, -0.2215, -0.2156, -0.2325, -0.8293],
          [-0.2140, -0.2148, -0.2114, -0.2250, -0.8046],
          [-0.2096, -0.2121, -0.2063, -0.2245, -0.8701]],
 
         [[-0.8864, -0.8925, -0.8886, -0.9054, -0.4053],
          [-0.8876, -0.8900, -0.8880, -0.9018, -0.3342],
          [-0.8852, -0.8898, -0.8837, -0.9015, -0.5153],
          ...,
          [-0.9100, -0.9144, -0.9127, -0.9229,  0.4694],
          [-0.9087, -0.9133, -0.9090,

In [7]:
# Create LightningModule
device = torch.device('cuda') if config.train['accelerator']=='gpu' else torch.device('cpu')
model = DTSModel(config.model, device)
model.to(device)

DTSModel(
  (model): CustomRNN(
    (rnn): LSTM(5, 32, num_layers=2, batch_first=True)
    (dropout): Dropout(p=0.2, inplace=False)
    (fc): Linear(in_features=32, out_features=1, bias=True)
  )
  (loss_fn): MSE()
)

In [8]:
# Callback to save the model checkpoint
checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        dirpath='./checkpoints/',
        filename='model-{epoch:02d}-{val_loss:.2f}',
        save_top_k=3,
        mode='min',
        save_last=True,
        every_n_epochs=1,  # Save checkpoint every epoch
    )

# Create Trainer
trainer = pl.Trainer(
        accelerator=config.train['accelerator'],
        devices=config.train['devices'],
        #strategy=config.train['strategy'],
        max_epochs=config.train['max_epochs'],
        callbacks=[
            EarlyStopping(monitor='val_loss',patience=5,verbose=False, mode='min'),
            LearningRateMonitor(logging_interval='step'),
            checkpoint_callback
        ],
        logger=logger,
    )

# Train the model
trainer.fit(model, datamodule=data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type      | Params
--------------------------------------
0 | model   | CustomRNN | 13.5 K
1 | loss_fn | MSE       | 0     
--------------------------------------
13.5 K    Trainable params
0         Non-trainable params
13.5 K    Total params
0.054     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [9]:
preds = trainer.test(datamodule=data_module)

  rank_zero_warn(
Restoring states from the checkpoint path at /hd1/dl/deep_time_series_framework/checkpoints/model-epoch=46-val_loss=0.08.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /hd1/dl/deep_time_series_framework/checkpoints/model-epoch=46-val_loss=0.08.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss          0.018804363906383514
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [10]:
preds

[{'test_loss': 0.018804363906383514}]

In [11]:
import numpy as np
test_loader = data_module.test_dataloader()
predictions = []
model.to(device)
for batch_idx, (inputs, labels) in enumerate(test_loader):
    if batch_idx !=0: continue
    
    outputs = model(inputs.to(device))
    if config.data['data_params']['normalize']:
        outputs = data_module.target_denormalize(outputs.detach().cpu().numpy().squeeze())
        labels = data_module.target_denormalize(labels.detach().cpu().numpy().squeeze())
    else:
        outputs = outputs.detach().cpu().numpy().squeeze()
        labels = labels.detach().cpu().numpy().squeeze()

    predictions.append(list(zip(labels, outputs)))
#predictions = np.concatenate(predictions, axis=0)

In [12]:
predictions

[[(362.845947265625, 298.82178),
  (365.7942199707031, 292.48602),
  (350.9635009765625, 323.79898),
  (355.3909301757813, 314.37848),
  (352.531982421875, 323.00446),
  (356.4927978515625, 326.48416),
  (364.047119140625, 330.75568),
  (355.8078308105469, 316.2437),
  (344.5210266113281, 332.85468),
  (344.94781494140625, 332.10526),
  (353.524658203125, 325.63394),
  (353.43530273437506, 321.35025),
  (342.9227905273437, 304.0764),
  (339.0115966796875, 309.60095),
  (335.606689453125, 335.14697),
  (327.0100402832031, 335.30087),
  (337.9990234375, 318.08066),
  (343.24041748046875, 311.84775),
  (344.2629089355469, 310.6474),
  (338.99176025390625, 312.06876),
  (344.6798095703125, 310.41614),
  (339.756103515625, 332.06604),
  (334.8324279785156, 318.8645),
  (322.4833984375, 341.5007),
  (320.9844970703125, 325.36752),
  (332.53927612304693, 284.04443),
  (328.8465576171875, 310.667),
  (322.0267639160156, 327.15515),
  (315.8424072265625, 304.29797),
  (325.7493591308594, 294.08