# Reappraisal Training on PyTorch Lightning

## Setup
- When running on Google Colab, mount Google Drive to access scripts.
- `cd` into the project root and install dependencies:

In [None]:
%load_ext autoreload

In [None]:
import torch

# Define constants
STRAT = 'obj'
BATCH_SIZE = 64
NUM_FOLDS=5

## Load LDH Data

Contains the following:

- LDHI
- LDHII

In [None]:
from reappraisalmodel.ldhdata import LDHDataModule
ldhdata = LDHDataModule(data_dir='.', strat=STRAT)
ldhdata.load_train_data()
ldhdata.load_eval_data()

Loading cached processed dataset at output/training/obj/cache-50ed3c54936a704e.arrow


Training data loaded from disk.
Encoding Training Data:
Evaluation data loaded from disk.
Encoding Test Data



HBox(children=(FloatProgress(value=0.0, max=32109.0), HTML(value='')))

## Run K-Fold Training


In [None]:
%autoreload 2
from reappraisalmodel.trainers import kfold_train
results = kfold_train(NUM_FOLDS, ldhdata, strat=STRAT, 
                       max_epochs=15, 
#                        limit_train_batches=2,
#                        limit_val_batches=1
                    )


In [None]:
import pandas as pd

df = pd.DataFrame(results)
df['r2score'] = df['r2score'].apply(lambda x: x.item())
df['explained_var'] = df['explained_var'].apply(lambda x: x.item())
df.describe()

## Tuning Hyperparameters


In [None]:
# export
%autoreload
import torch
import pytorch_lightning as lit
from pytorch_lightning.loggers import TensorBoardLogger
from ray.tune.integration.pytorch_lightning import TuneReportCallback
from ray import tune

from reappraisalmodel.lightningreapp import LightningReapp

default_tune_config = {
    "lr": tune.loguniform(1e-4, 1e-1), # loguniform samples by magnitude
    "hidden_layer_size": tune.randint(0,50),
}

callback_tuner = TuneReportCallback(
    {
        "loss": "val_loss",
    },
    on="validation_end",
)

### TUNING HYPERPARAMETERS
def train_tune(config, ldhdata, num_gpus=None, num_epochs=10):
    model = LightningReapp(config)
    print("Running tune")
    trainer = lit.Trainer(
        limit_train_batches=1,
        limit_val_batches=1,
        gpus=num_gpus,
        callbacks=[callback_tuner],
    )
    trainer.fit(model, ldhdata)

analysis = tune.run(
    tune.with_parameters(train_tune,
        ldhdata=ldhdata,
        num_epochs=1),
    config=default_tune_config, 
    num_samples=2)
print("Best hyperparameters found were: ", analysis.best_config)


In [None]:
%autoreload
import pytorch_lightning as lit

from reappraisalmodel.lightningreapp import LightningReapp

model = LightningReapp({
    'lr': 1e-3,
    'hidden_layer_size': 50
})

trainer = lit.Trainer(fast_dev_run=1)
trainer.fit(model, ldhdata)


## Extra!

In [None]:
%autoreload
from reappraisalmodel.lightningreapp import LightningReapp
objmodel = LightningReapp.load_from_checkpoint("s3://ldhdata/backup/far-0224-epoch=2-step=2021.ckpt")


In [None]:
from tqdm import tqdm
objmodel.cuda()
objmodel.eval()
objouts = []
for batch_idx, batch in enumerate(tqdm(objdl)):
    input_ids = batch['input_ids'].cuda()
    attention_mask = batch['attention_mask'].cuda()
    out = objmodel(input_ids, attention_mask)
    objouts.append(out.sum(dim=1))
print(objouts)

In [None]:
%autoreload
import datetime
import torch
import pytorch_lightning as lit
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from reappraisalmodel.lightningreapp import LightningReapp


model = LightningReapp()


modelcheckpoint = ModelCheckpoint(
    monitor='val_loss',
    mode='min',
    save_top_k=3,
    verbose=True
    )

modelcheckpoint_loss_dist = ModelCheckpoint(
    monitor='loss_distance',
    mode='min',
    save_top_k=3,
    verbose=True
)

ldhdata.train_data.set_format(type='torch', columns=['score', 'input_ids', 'attention_mask'])
data = ldhdata.train_data.train_test_split(test_size=0.2)
train_data = data['train']
val_data = data['test']
train_dl = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=4,shuffle=True)
val_dl = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

today = datetime.datetime.today().strftime('%Y%m%d_%H%M%S')
trainer = lit.Trainer(
    logger = TensorBoardLogger("lightning_logs", name="reapp_model", version="_".join([STRAT,today])),
    precision=16,
    val_check_interval=0.25,
    callbacks=[modelcheckpoint, earlystopping, modelcheckpoint_loss_dist],
    gpus=1)

results = trainer.fit(model, train_dl, val_dl)



GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Using native 16bit precision.

  | Name          | Type              | Params
----------------------------------------------------
0 | bert          | DistilBertModel   | 66.4 M
1 | classifier    | Sequential        | 38.8 K
2 | train_loss    | MeanSquaredError  | 0     
3 | val_loss      | MeanSquaredError  | 0     
4 | r2score       | R2Score           | 0     
5 | explained_var | ExplainedVariance | 0     
----------------------------------------------------
38.8 K    Trainable params
66.4 M    Non-trainable params
66.4 M    Total params
265.607   Total estimated model params size (MB)
Epoch 0, global step 40: val_loss reached 1.90197 (best 1.90197), saving model to "lightning_logs/reapp_model/obj_20210306_184726/checkpoints/epoch=0-step=40.ckpt" as top 3
Epoch 0, global step 40: loss_distance reached 4.07076 (best 4.07076), saving model to "lightning_logs/reapp_model/obj_20210306_184726/checkpoints/epoch=0-step

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

In [None]:
ldhdata.eval_data.set_format(type='torch', columns=['input_ids', 'attention_mask'])
eval_data = ldhdata.eval_data
eval_dl = DataLoader(eval_data, batch_size=BATCH_SIZE, num_workers=4,shuffle=True)

results = trainer.test(model, eval_dl)

import pickle
with open("results_obj", "wb+") as f:
pickle.dump(results, f)

1

In [None]:
import datetime
import torch
import pytorch_lightning as lit
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from reappraisalmodel.lightningreapp import LightningReapp
from reappraisalmodel.ldhdata import LDHDataModule


STRAT = 'obj'
BATCH_SIZE = 64
NUM_FOLDS=5

ldhdata = LDHDataModule(data_dir='.', strat=STRAT)


ldhdata.load_train_data()
ldhdata.load_eval_data()
# Define constants

model = LightningReapp()

modelcheckpoint = ModelCheckpoint(
    monitor='val_loss',
    mode='min',
    save_top_k=3,
    verbose=True
    )



ldhdata.train_data.set_format(type='torch', columns=['score', 'input_ids', 'attention_mask'])
data = ldhdata.train_data.train_test_split(test_size=0.2)
train_data = data['train']
val_data = data['test']
train_dl = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=4,shuffle=True)
val_dl = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

today = datetime.datetime.today().strftime('%Y%m%d_%H%M%S')
trainer = lit.Trainer(
    logger = TensorBoardLogger("lightning_logs", name="reapp_model", version="_".join([STRAT,today])),
    precision=16,
    max_epochs=30,
    val_check_interval=0.25,
    gpus=1)

results = trainer.fit(model, train_dl, val_dl)

ldhdata.eval_data.set_format(type='torch', columns=['input_ids', 'attention_mask'])
eval_data = ldhdata.eval_data
eval_dl = DataLoader(eval_data, batch_size=BATCH_SIZE, num_workers=4,shuffle=True)

results = trainer.test(model, eval_dl)

import pickle
with open("results_obj", "wb+") as f:
    pickle.dump(results, f)

Loading cached processed dataset at output/training/obj/cache-50ed3c54936a704e.arrow
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Using native 16bit precision.

  | Name          | Type              | Params
----------------------------------------------------
0 | bert          | DistilBertModel   | 66.4 M
1 | classifier    | Sequential        | 38.8 K
2 | train_loss    | MeanSquaredError  | 0     
3 | val_loss      | MeanSquaredError  | 0     
4 | r2score       | R2Score           | 0     
5 | explained_var | ExplainedVariance | 0     
----------------------------------------------------
38.8 K    Trainable params
66.4 M    Non-trainable params
66.4 M    Total params
265.607   Total estimated model params size (MB)


Training data loaded from disk.
Encoding Training Data:
Evaluation data loaded from disk.
Encoding Test Data




HBox(children=(FloatProgress(value=0.0, max=32109.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

In [None]:
cpuouts = [ten.detach().cpu().tolist() for ten in objouts]


newouts = []
for batch in cpuouts:
    newouts += batch
len(newouts)

import pandas as pd
objdata = ldhdata.train_data
objdata.reset_format()

objdf = pd.DataFrame(objdata, columns=['response', 'score'])
objdf[['observed']] = newouts


In [None]:
objdf.to_csv("./study1full_scored_far.csv")

In [None]:
farmodel = LightningReapp.load_from_checkpoint("s3://ldhdata/backup/far-0224-epoch=2-step=2021.ckpt")
farldhdata = LDHDataModule(data_dir='.', strat='far')
farldhdata.load_train_data()
ldhdata.train_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'score'])
fardl = torch.utils.data.DataLoader(ldhdata.train_data,batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
farouts = []
farmodel.cuda()
print("Sent model to GPU")
farmodel.eval()

In [None]:
for idx, batch in enumerate(tqdm(fardl)):
    if idx >= 10:
        break
    input_ids = batch['input_ids'].cuda()
    attention_mask = batch['attention_mask'].cuda()
    farout = farmodel(input_ids, attention_mask)
    farouts.append(out.sum(dim=1).to('cpu'))

In [None]:
cpuouts = [ten.detach().cpu().tolist() for ten in farouts]


newouts = []
for batch in cpuouts:
    newouts += batch
len(newouts)

traindata = ldhdata.train_data

traindata.reset_format()

import pandas as pd
far_df = pd.DataFrame(ldhdata.train_data[:len(newouts)], columns=['response', 'score', 'observed'])
far_df[['observed']] = newouts

In [None]:
far_df
far_df.to_csv('study1subset_scored_far.csv')

In [None]:
# hide
# Returns a BatchEncoding of the text.
tokenized = tokenizer(text = ["This is the first test sentence!", "This is the second, better test sentence."], 
    padding='max_length', max_length=150)

for idx, sent in enumerate(tokenized.input_ids):
    print(f"Sentence            {idx}: {tokenizer.convert_ids_to_tokens(sent)}")
    print(f"Tokenized Attention {idx}: {tokenized[idx].attention_mask}")



In [None]:
%autoreload
import torch
import pytorch_lightning as lit
from reappraisalmodel.lightningreapp import LightningReapp

default_config = default_config = {
    'lr': 1e-3,
    'hidden_layer_size': 50
}

model = LightningReapp(default_config)

trainer = lit.Trainer(
    gpus = 1 if torch.cuda.is_available() else None,
    gradient_clip_val=1.0,
    progress_bar_refresh_rate=30,
    max_epochs=10,
    fast_dev_run=2,
    terminate_on_nan=True)

model = LightningReapp(default_config)

trainer.fit(model, ldhdata.train_dataloader(), ldhdata.val_dataloader())

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Running in fast_dev_run mode: will run a full train, val and test loop using 2 batch(es).

  | Name       | Type             | Params
------------------------------------------------
0 | bert       | DistilBertModel  | 66.4 M
1 | classifier | Sequential       | 38.8 K
2 | train_loss | MeanSquaredError | 0     
3 | val_loss   | MeanSquaredError | 0     
------------------------------------------------
38.8 K    Trainable params
66.4 M    Non-trainable params
66.4 M    Total params
265.607   Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

{'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0'), 'input_ids': tensor([[ 101, 2045, 2003,  ...,    0,    0,    0],
        [ 101, 2045, 2003,  ...,    0,    0,    0],
        [ 101, 1996, 2775,  ...,    0,    0,    0],
        ...,
        [ 101, 2009, 3504,  ...,    0,    0,    0],
        [ 101, 3745, 2062,  ...,    0,    0,    0],
        [ 101, 2010, 2970,  ...,    0,    0,    0]], device='cuda:0'), 'score': tensor([3.7500, 1.5000, 2.0000, 1.6667, 2.0000, 2.0000, 2.2000, 1.0000, 1.6667,
        1.6667, 3.0000, 2.6000, 2.5000, 1.8000, 1.0000, 2.3333],
       device='cuda:0')}
{'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
       

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

1