## Example of training/testing using deterministic models with pytorch-lightning

In [1]:
from pathlib import Path
from types import SimpleNamespace
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
#from torch.utils.tensorboard import SummaryWriter # Uncomment to launch on vs

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path: # To be able to import files 
    sys.path.append(module_path) # without installing the project

import numpy as np

from bayesrul.ncmapss.dataset import NCMAPSSDataModule
from bayesrul.ncmapss.frequentist import NCMAPSSModel, get_checkpoint, TBLogger

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
args = SimpleNamespace(
    data_path="../data/ncmapss/",
    out_path="../results/ncmapss/",
    scn="dnn_ptl",
    net="linear",
    lr=1e-4
)
checkpoint_dir = Path(f"{args.out_path}/{args.scn}/checkpoints/{args.net}")
logger = TBLogger(
    f"{args.out_path}/{args.scn}/lightning_logs/{args.net}",
    default_hp_metric=False,
)

In [3]:
# Just to illustrate. To properly train see scripts/cmapss_training.py
data = NCMAPSSDataModule(args.data_path, batch_size=10000)
dnn = NCMAPSSModel(data.win_length, data.n_features, args.net)
checkpoint_file = get_checkpoint(checkpoint_dir)

monitor = f"{dnn.loss}_loss/val"
checkpoint_callback = ModelCheckpoint(dirpath=checkpoint_dir, monitor=monitor)
earlystopping_callback = EarlyStopping(monitor=monitor, patience=10)

trainer = pl.Trainer(
    gpus=[0],
    max_epochs=1000,
    log_every_n_steps=2,
    logger=logger,
    callbacks=[
        checkpoint_callback,
        earlystopping_callback,
    ],
)
trainer.fit(dnn, data, ckpt_path=checkpoint_file)

AttributeError: 'NCMAPSSModel' object has no attribute 'loss_name'

In [None]:
data = NCMAPSSDataModule(args.data_path, batch_size=1000)
dnn = NCMAPSSModel.load_from_checkpoint(get_checkpoint(checkpoint_dir))
trainer = pl.Trainer(gpus=[0], log_every_n_steps=10, logger=logger, 
                    max_epochs=-1) # Silence warning
test_loss = trainer.test(dnn, data, verbose=False)
print(f"Test MSE : {test_loss[0][f'{dnn.loss_name}_loss/test']}")
print(f"Test RMSE : {np.sqrt(test_loss[0][f'{dnn.loss_name}_loss/test'])}")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 62.60it/s]
Test MSE : 465.4652099609375
Test RMSE : 21.57464275395858
