# Reappraisal Training on PyTorch Lightning

## Setup
- When running on Google Colab, mount Google Drive to access scripts.
- `cd` into the project root and install dependencies.

In [None]:
%load_ext autoreload
# from google.colab import drive
# drive.mount('/content/drive')
#%cd "/content/drive/MyDrive/ldh"
# import nltk
# nltk.download('punkt')

%cd "/Users/danielpham/Google Drive/ldh"
# %pip install transformers datasets pytorch-lightning nltk matplotlib "ray[tune]"
%pylab inline


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/danielpham/Google Drive/ldh
Populating the interactive namespace from numpy and matplotlib


In [None]:
from datetime import datetime
from pathlib import Path

import torch
import pandas as pd 
import pytorch_lightning as lit 

from reappraisalmodel.ldhdata import LDHDataModule

num_folds = 3
batch_size = 16
strat = 'far'

ldhdata = LDHDataModule(batch_size=batch_size, strat=strat, kfolds=num_folds)


Loading cached processed dataset at /Users/danielpham/Google Drive/ldh/output/training/far/cache-22098b089e6d8812.arrow


Training data loaded from disk.
Encoding Train Data:
Evaluation data loaded from disk.
Encoding Test Data:


HBox(children=(FloatProgress(value=0.0, max=32109.0), HTML(value='')))




PosixPath('/Users/danielpham/Google Drive/ldh')

In [None]:
Path.cwd()

PosixPath('/Users/danielpham/Google Drive/ldh')

In [None]:
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, Callback
#from pl_bolts.callbacks import PrintTableMetricsCallback
#from ray.tune.integration.pytorch_lightning import TuneReportCallback


default_config = {
    'lr': 1e-3,
    'hidden_layer_size': 50
}


save_dir = Path.cwd() / 'output'
logger = lit.loggers.TensorBoardLogger(save_dir=save_dir)

# Stops when the val loss stops changing
callback_earlystopping = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, verbose=True)
# Saves the top 3 checkpoints


EarlyStopping mode set to min for monitoring val_loss.


In [None]:
### K-FOLD CV
%autoreload 2 
from datetime import datetime
from reappraisalmodel.lightningreapp import LightningReapp


for i in range(num_folds):
    split = i # Current split being trained
    train_dl = ldhdata.get_train_dataloader(split)
    val_dl = ldhdata.get_val_dataloader(split)
    
    strat = ldhdata.strat
    dt = datetime.now() # get the datetime 
    
    callback_checkpoint = ModelCheckpoint(
        dirpath=save_dir,
        filename='{epoch}-{val_loss:.2f}',
        monitor='val_loss', verbose=False, 
        save_last=False, save_top_k=1, save_weights_only=False, 
        mode='min', period=1, prefix=strat)
    
    model = LightningReapp(default_config)
    trainer = lit.Trainer(
        fast_dev_run=2,
        logger=logger,
        max_epochs=50,
        gradient_clip_val=1.0,
        progress_bar_refresh_rate=30,
        terminate_on_nan=True,
        weights_summary="top",
        weights_save_path=save_dir,
        callbacks=[callback_checkpoint,callback_early_stopping])
    print(f"Training on split {i + 1}")
    trainer.fit(model, train_dl, val_dl)
    print(callback_checkpoint.best_model_path)

GPU available: False, used: False
TPU available: None, using: 0 TPU cores
Running in fast_dev_run mode: will run a full train, val and test loop using 2 batch(es).

  | Name       | Type             | Params
------------------------------------------------
0 | model      | DistilBertModel  | 66.4 M
1 | avg        | AvgPool1d        | 0     
2 | classifier | Sequential       | 629 K 
3 | train_loss | MeanSquaredError | 0     
4 | val_loss   | MeanSquaredError | 0     
------------------------------------------------
67.0 M    Trainable params
0         Non-trainable params
67.0 M    Total params


Training on split 1




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

[{'loss': tensor(4.3218)}, {'loss': tensor(3.3146)}]




GPU available: False, used: False
TPU available: None, using: 0 TPU cores
Running in fast_dev_run mode: will run a full train, val and test loop using 2 batch(es).

  | Name       | Type             | Params
------------------------------------------------
0 | model      | DistilBertModel  | 66.4 M
1 | avg        | AvgPool1d        | 0     
2 | classifier | Sequential       | 629 K 
3 | train_loss | MeanSquaredError | 0     
4 | val_loss   | MeanSquaredError | 0     
------------------------------------------------
67.0 M    Trainable params
0         Non-trainable params
67.0 M    Total params


Training on split 2


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

[{'loss': tensor(18.8731)}, {'loss': tensor(18.0243)}]




GPU available: False, used: False
TPU available: None, using: 0 TPU cores
Running in fast_dev_run mode: will run a full train, val and test loop using 2 batch(es).

  | Name       | Type             | Params
------------------------------------------------
0 | model      | DistilBertModel  | 66.4 M
1 | avg        | AvgPool1d        | 0     
2 | classifier | Sequential       | 629 K 
3 | train_loss | MeanSquaredError | 0     
4 | val_loss   | MeanSquaredError | 0     
------------------------------------------------
67.0 M    Trainable params
0         Non-trainable params
67.0 M    Total params


Training on split 3


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

[{'loss': tensor(12.3200)}, {'loss': tensor(20.2766)}]




''