# Reappraisal Training on PyTorch Lightning

## Setup
- Required Python Version: 3.7+
- `cd` into the project root and install dependencies:
  - `pip install -r requirements.txt`

### Colab
- When the repository is stored on Google Drive, it can be accessed using Google Colaboratory. The cell below mounts the drive, installs the necessary packages, and changes the root directory to the project directory.
    - Python Version: `3.7.10`
    - Be sure to change the name of the project directory. 

In [None]:
# When Running on Colab
# from google.colab import drive
# drive.mount('/content/drive')

# %pip install pytorch-lightning "ray[tune]" wandb transformers datasets nltk nbdev
# ! nbdev_install_git_hooks

import nltk
nltk.download('punkt')

# ROOT_DIR = '/content/drive/MyDrive/ldh'
ROOT_DIR = ".";
%cd {ROOT_DIR}

/Users/danielpham/Google Drive/ldh


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/danielpham/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Loading and Encoding Data

In [None]:
%load_ext autoreload
import os
import torch

# Define project root directory.
ROOT_DIR = os.path.abspath(".")
STRAT = 'obj'
BATCH_SIZE = 64
NUM_FOLDS=5
DEV_FLAG = 1 # Flag for fast runs when debugging.

# Load the DataModule and its corresponding 
from reappraisalmodel.ldhdata import LDHDataModule
ldhdata = LDHDataModule(data_dir=ROOT_DIR, strat=STRAT)
ldhdata.load_train_data()
ldhdata.load_eval_data()

# Load Model
- Can be done through checkpoint. 

## Run K-Fold Training
- See `./nbs/Trainers.ipynb`


In [None]:
%autoreload 2
from reappraisalmodel.trainers import kfold_train
# Learns a model NUM_FOLDS times and records the distribution of metrics across the CV.
results = kfold_train(
    NUM_FOLDS, 
    ldhdata, 
    strat=STRAT, 
)

In [None]:
import pandas as pd

df = pd.DataFrame(results)
df['r2score'] = df['r2score'].apply(lambda x: x.item())
df['explained_var'] = df['explained_var'].apply(lambda x: x.item())
df.describe()

## Testing on LDH II Data

# Training Process

## Test Models (Manual)

In [None]:
%autoreload
from reappraisalmodel.lightningreapp import LightningReapp
objmodel = LightningReapp.load_from_checkpoint("s3://ldhdata/backup/far-0224-epoch=2-step=2021.ckpt")

In [None]:
from tqdm import tqdm
objmodel.cuda()
objmodel.eval()
objouts = []
for batch_idx, batch in enumerate(tqdm(objdl)):
    input_ids = batch['input_ids'].cuda()
    attention_mask = batch['attention_mask'].cuda()
    out = objmodel(input_ids, attention_mask)
    objouts.append(out.sum(dim=1))
print(objouts)

## Single Training Session

In [None]:
%autoreload
import pickle

from datetime import datetime
from tqdm import tqdm

import pandas as pd
import torch
import pytorch_lightning as lit
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from reappraisalmodel.lightningreapp import LightningReapp

model = LightningReapp()

# Model saves the 3 checkpoints with the lowest validation loss throughout training
modelcheckpoint = ModelCheckpoint(
    monitor='val_loss',
    mode='min',
    save_top_k=3,
    verbose=True
)
# Model tracks the loss_distance; shows when training and validation loss begin to diverge 
modelcheckpoint_loss_dist = ModelCheckpoint(
    monitor='loss_distance',
    mode='min',
    save_top_k=3,
    verbose=True
)

# Split train and validation data.
split_data = ldhdata.train_data.train_test_split(test_size=0.2)
train_data = split_data['train'].with_format(type='torch', columns=['score', 'input_ids', 'attention_mask'])
val_data = split_data['test'].with_format(type='torch', columns=['score', 'input_ids', 'attention_mask'])
eval_data = ldhdata.eval_data.with_format(type='torch', columns=['input_ids', 'attention_mask'])

# Create dataloaders
train_dl = DataLoader(train_data, batch_size=BATCH_SIZE)
val_dl = DataLoader(val_data, batch_size=BATCH_SIZE )
eval_dl = DataLoader(eval_data, batch_size=BATCH_SIZE)

# Mark the start time of the training session. 
today = datetime.today().strftime('%Y%m%d_%H%M%S')
session_version = "_".join([STRAT,today])
tb_logger = TensorBoardLogger("lightning_logs", name="reapp_model", version=session_version)
trainer = lit.Trainer(
    logger = tb_logger,
    precision=16 if torch.cuda.is_available() else 32, # We use 16-bit precision to reduce computational complexity
    val_check_interval=0.25, # Check validation loss 4 times an epoch
    callbacks=[modelcheckpoint, modelcheckpoint_loss_dist], # Register callbacks with trainer.
    gpus=1 if torch.cuda.is_available() else None,
)


# results = trainer.fit(model, train_dl, val_dl)


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Using native 16bit precision.
Running in fast_dev_run mode: will run a full train, val and test loop using 2 batch(es).


## Predictions on Study 2 Data

In [None]:
outs = []
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
for idx, batch in enumerate(tqdm(eval_dl)):
    if DEV_FLAG and idx >= 2:
        break
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    out = model(input_ids, attention_mask)
    outs.append(out.sum(dim=1).detach().cpu().tolist())
newouts = []
for batch in outs:
    newouts += batch

df = pd.DataFrame(ldhdata.eval_data[:len(newouts)], columns=['response', 'observed'])
df[['observed']] = newouts
df





  0%|          | 0/502 [00:00<?, ?it/s][A[A[A[A



  0%|          | 1/502 [00:00<01:22,  6.08it/s][A[A[A[A



  0%|          | 2/502 [00:00<01:44,  4.78it/s]


Unnamed: 0,response,observed
0,this is a photo of a man and woman who look li...,0.542543
1,the man and woman a. this photo is of a child ...,0.505585
2,the child has a pained expression on its face ...,0.495090
3,this is two hands holding a small gun.,0.245818
4,"the hands have long nails, and the gun is eith...",0.358046
...,...,...
123,A man has a deformed face from a chemical acci...,0.535275
124,A man who was once a psychopath has a disgusti...,0.523399
125,This child suffers from a stab one that happen...,0.450596
126,A boy is at the dentist.,0.453862


## Prediction on Study 1 Data

In [None]:
outs = []
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
for idx, batch in enumerate(tqdm(train_dl)):
    if DEV_FLAG and idx >= 2:
        break
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    out = model(input_ids, attention_mask)
    outs.append(out.sum(dim=1).detach().cpu().tolist())
newouts = []
for batch in outs:
    newouts += batch
df = pd.DataFrame(ldhdata.train_data[:len(newouts)], columns=['response', 'score', 'observed'])
df[['observed']] = newouts
df





  0%|          | 0/164 [00:00<?, ?it/s][A[A[A[A



  1%|          | 1/164 [00:00<00:28,  5.77it/s][A[A[A[A



  1%|          | 2/164 [00:00<00:28,  5.69it/s][A[A[A[A

Unnamed: 0,response,score,observed
0,Like he's looking out and trying to think abo...,1.75,0.168645
1,A man has a knife close to the throat of a wo...,6.50,0.143740
2,He is in a kitchen that is kind of bare.,4.00,0.169842
3,A man is staring at the camera and looks upset.,2.75,0.106481
4,A woman is holding a child.,6.00,0.122527
...,...,...,...
123,"His finger was on the trigger, ready to fire.",5.25,0.275535
124,also could be gooby from meet the robinsons l...,2.50,0.138392
125,I love to see a child reading and she already...,1.50,0.060412
126,I don't really feel anything to this because ...,1.75,0.158868
