In [1]:
import argparse
import json
import pytorch_lightning as pl
from argparse import Namespace
from model import DTSModel
from datamodules.csvdatamodule import CsvDataModule
from datamodules.hivedatamodule import HiveDataModule
from datamodules.sqldatamodule import SqlDataModule
from datamodules.s3datamodule import S3DataModule
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from dataclasses import dataclass
import torch

In [2]:
@dataclass
class Config:
    data: dict
    model: dict
    train:dict
        
config = Config(
    data= {
        "data": "csv",
        "data_params": {
            "num_feature": 5,
            "seq_len": 20,
            "tgt_len": 1,
            "batch_size": 64,
            "train_path": "data/AAPL_train.csv",
            "val_path": "data/AAPL_val.csv",
            "test_path": "data/AAPL_test.csv"
        }
    },
    model= {
        "model_name": "custom_rnn",
        "model_params": {
            "input_size": 5,
            "hidden_size": 32,
            "output_size": 1,
            "num_layers": 2,
            "lr": 2e-5,
        },
        "loss_fn_type": "mse",
        "loss_params": {}
    },
    train=  {
        "accelerator": "gpu",
        "devices": 1,
        "strategy": 'ddp',
        "max_epochs": 100
    }
)

In [3]:
pl.seed_everything(42, workers=True)
logger = TensorBoardLogger('logs/', name=config.model['model_name'])

Global seed set to 42


In [4]:
# Create LightningDataModule
data_module = CsvDataModule(config.data['data_params'])


In [5]:
#next(iter(data_module.train_dataloader()))

In [6]:
# Create LightningModule
device = torch.device('cuda') if config.train['accelerator']=='gpu' else torch.device('cpu')
model = DTSModel(config.model, device)
model.to(device)

DTSModel(
  (model): CustomRNN(
    (rnn): LSTM(5, 32, num_layers=2, batch_first=True)
    (dropout): Dropout(p=0.2, inplace=False)
    (fc): Linear(in_features=32, out_features=1, bias=True)
  )
  (loss_fn): MSE()
)

In [7]:
# Callback to save the model checkpoint
checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        dirpath='./checkpoints/',
        filename='model-{epoch:02d}-{val_loss:.2f}',
        save_top_k=3,
        mode='min',
        save_last=True,
        every_n_epochs=1,  # Save checkpoint every epoch
    )

# Create Trainer
trainer = pl.Trainer(
        accelerator=config.train['accelerator'],
        devices=config.train['devices'],
        #strategy=config.train['strategy'],
        max_epochs=config.train['max_epochs'],
        callbacks=[
            #EarlyStopping(monitor='val_loss'),
            LearningRateMonitor(logging_interval='step'),
            checkpoint_callback
        ],
        logger=logger,
    )

# Train the model
trainer.fit(model, datamodule=data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type      | Params
--------------------------------------
0 | model   | CustomRNN | 13.5 K
1 | loss_fn | MSE       | 0     
--------------------------------------
13.5 K    Trainable params
0         Non-trainable params
13.5 K    Total params
0.054     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.


In [8]:
preds = trainer.test(datamodule=data_module)

  rank_zero_warn(
Restoring states from the checkpoint path at /hd1/dl/deep_time_series_framework/checkpoints/model-epoch=99-val_loss=1.35.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /hd1/dl/deep_time_series_framework/checkpoints/model-epoch=99-val_loss=1.35.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss            2.444538116455078
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [9]:
preds

[{'test_loss': 2.444538116455078}]

In [10]:
import numpy as np
test_loader = data_module.test_dataloader()
predictions = []
model.to(device)
for batch_idx, (inputs, labels) in enumerate(test_loader):
    if batch_idx !=0: continue
    
    outputs = model(inputs.to(device))
    outputs = data_module.target_denormalize(outputs.detach().cpu().numpy().squeeze())
    labels = data_module.target_denormalize(labels.detach().cpu().numpy().squeeze())

    predictions.append(list(zip(labels, outputs)))
#predictions = np.concatenate(predictions, axis=0)

In [11]:
predictions

[[(174.55882263183594, 110.064156),
  (171.64024353027344, 113.452805),
  (171.3520050048828, 116.53582),
  (170.62640380859375, 114.73109),
  (173.77732849121094, 106.39766),
  (175.21856689453125, 127.441505),
  (171.08363342285156, 128.48775),
  (167.6245880126953, 121.94891),
  (167.86314392089844, 110.03556),
  (171.74957275390625, 120.02885),
  (171.5110321044922, 115.632675),
  (167.86314392089844, 118.15627),
  (166.2926483154297, 131.22746),
  (163.3306121826172, 120.964424),
  (159.10618591308594, 126.921936),
  (161.7601318359375, 123.06795),
  (163.85740661621094, 112.166115),
  (164.12576293945312, 118.47991),
  (162.2173309326172, 121.95179),
  (165.55709838867188, 115.21386),
  (165.2290802001953, 127.976814),
  (162.18751525878906, 125.46417),
  (158.3408203125, 103.3177),
  (156.49200439453125, 119.9159),
  (161.96884155273438, 100.61244),
  (157.5655059814453, 115.668045),
  (153.79832458496094, 111.73263),
  (149.71305847167972, 111.392624),
  (154.1561737060547, 127