In [None]:
import sys

from google.colab import drive
drive.mount('/content/drive', force_remount=True)
sys.path.append("/content/drive/MyDrive/DL4NLP/abstract-to-title-generation")
from config import *

In [None]:
!cd "{PROJECT_ROOT}"
sys.path.append(f"{PROJECT_ROOT}/src")

In [None]:
!pip install -r "requirements.txt" -f &> /dev/null

In [None]:
#!dvc pull -f

In [None]:
import pandas as pd
import numpy as np
import torch
import datasets
from torch.utils.data import Dataset, DataLoader
from datasets import Dataset
from tqdm import trange 
from transformers import AutoConfig, AutoTokenizer
import torch.nn as nn
from torch import optim
from transformers import BertModel,BertPreTrainedModel
import torch.nn as nn
from scipy import stats
import os
from pathlib import Path
import matplotlib.pyplot as plt
import math
import time
import datetime
import model_utils
import dataset_utils

In [None]:
## Model Configurations
p = {
    'max_len': 512,
    'batch_size': 6,
    'lr': 4.0638e-05,
    'epochs': 18, #18
    'dropout': 0.5,
    'num_threads': 1,
    'model_name': 'allenai/scibert_scivocab_uncased',
    #'model_name': 'bert-base-uncased',
    'do_train': True,
    'random_seed': 24
}

## Fine Tuning

In [None]:
## Configuration loaded from AutoConfig 
aconfig = AutoConfig.from_pretrained(p['model_name'])
## Tokenizer loaded from AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(p['model_name'])
## Creating the model from the desired transformer model
model = model_utils.BertRegresser.from_pretrained(p['model_name'], config=aconfig)

In [None]:
#freeze all layers except regression head

unfreeze_layers = ['bert.pooler', 'regressor.1']
for name, params in model.named_parameters():
  params.requires_grad = False
  for ele in unfreeze_layers:
    if ele in name:
      params.requires_grad = True
      break

for name, params in model.named_parameters():
  if params.requires_grad:
    print(name, params.size())

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
## Putting model to device
model = model.to(device)
## Takes as the input the logits of the positive class and computes the binary cross-entropy 
# criterion = nn.BCEWithLogitsLoss()
criterion = nn.MSELoss()
## Optimizer
optimizer = optim.Adam(params=model.parameters(), lr=p['lr'])

### Generate training data

In [None]:
annotations = pd.read_json(f'{DATA_DIR}/annotated/dataset_230samples.json')

train_loader, dev_loader, test_loader = dataset_utils.gen_datasets(
    tokenizer,
    annotations,
    p["max_len"],
    p["batch_size"],
    p["num_threads"]
)

### Training

In [None]:
# Do Train (do not use this for training of reward model, reward model trained using ray tune)

if p['do_train']:
  model_utils.train(model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=dev_loader,
    epochs = p['epochs'],
    device = device)


### Save model checkpoint


In [None]:
stats_df = pd.DataFrame(np.array(training_stats))
stats_df.columns = ["episode", "accuracy", "val_loss"]
display(stats_df)

In [None]:
save_folder = f"{PROJECT_ROOT}/reward_model/finetuned_size{df_len}_ep{p['epochs']}_{datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d__%H_%M_%S')}"
save_file = "model.pth"
save_path = f"{save_folder}/{save_file}"

Path(save_folder).mkdir(parents=True, exist_ok=True)

torch.save(model.state_dict(), save_path)
stats_df.to_csv(f"{save_folder}/stats.csv")

### Load best read model

In [None]:
model_state, optimizer_state = torch.load(os.path.join(f'{PROJECT_ROOT}/reward_model/{save_path}', "checkpoint"))
model.load_state_dict(model_state)

In [None]:
model = model_utils.BertRegresser.from_pretrained(p['model_name'], config=aconfig)

model_path = f"{PROJECT_ROOT}/reward_model/finetuned_size230_ep18_2022-08-08__15_59_58_05/model.pth"

model.load_state_dict(torch.load(model_path))
model.to(device)


## Display Correlation

In [None]:
def predict(model, dataloader, device):
    predicted_label = []
    actual_label = []
    with torch.no_grad():
        for input_ids, attention_mask, target in (dataloader):
            
            input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
            output = model(input_ids, attention_mask)
                        
            predicted_label += output
            actual_label += target
            
    return predicted_label, actual_label

def display_correlation(model, loader, device):
    output,GS_label = predict(model, loader, device)
    cpu_output = np.array([x.cpu().data.numpy() for x in output]).squeeze()
    cpu_target = np.array([x.cpu().data.numpy() for x in GS_label]).squeeze()
    print(stats.spearmanr(cpu_output, cpu_target)[0])

In [None]:
print("Train")
display_correlation(model, train_loader, device)
print("Dev")
display_correlation(model, dev_loader, device)
print("Test")
display_correlation(model, test_loader, device)

## Train Humor Model

In [None]:
annotations = pd.read_csv(f'{DATA_DIR}/humor/quirky_annotated.csv')
quality_model = model

# annotate quality score with quality_model
df = dataset_utils.gen_humor_dataframe(
    tokenizer,
    quality_model,
    device,
    annotations,
    p["max_len"],
    p["num_threads"]
)

display(df)

humor_model = model_utils.HumorBertRegresser.from_pretrained(p['model_name'], config=aconfig)
#humor_model.load_state_dict(torch.load(model_path))
humor_tokenizer = AutoTokenizer.from_pretrained(p['model_name'])
humor_model.to(device)
humor_tokenizer, humor_model = dataset_utils.add_humor_token(humor_tokenizer, humor_model)

train_loader, dev_loader, test_loader = dataset_utils.gen_humor_datasets(
    humor_tokenizer,
    df,
    p["max_len"],
    p["num_threads"]
)

## HUMOR Optimizer
humor_optimizer = optim.Adam(params=humor_model.parameters(), lr=p['lr'])

unfreeze_layers = ['bert.pooler', 'regressor.1']
for name, params in humor_model.named_parameters():
  params.requires_grad = False
  for ele in unfreeze_layers:
    if ele in name:
      params.requires_grad = True
      break

for name, params in humor_model.named_parameters():
  if params.requires_grad:
    print(name, params.size())

In [None]:
def evaluate_humor(model, criterion, dataloader, device):
    assert dataloader.dataset.humor
    model.eval()
    mean_acc, mean_loss, count = 0, 0, 0
    preds = []
    lst_label = []

    plt.figure()

    def axis_corr(preds, labels, ax):
        """predss = np.array([x.tolist() for x in preds[:, ax]])#.squeeze()
        lst_labels = np.array([x.tolist() for x in labels[:, ax]])#.squeeze()"""
        preds_labels = np.array([preds[:, ax], labels[:, ax]])
        return stats.spearmanr(preds_labels, axis=1)[0]

    with torch.no_grad():
        for input_ids, attention_mask, target in (dataloader):

            input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
            output = model(input_ids, attention_mask)
            preds += output.cpu().data.numpy().tolist()
            lst_label += target.cpu().data.numpy().tolist()
            mean_loss += criterion(output, target.type_as(output)).item()
            # mean_err += get_rmse(output, target)
            count += 1

        preds = np.array(preds)
        lst_label = np.array(lst_label)
        plt.plot(preds[:,0])
        plt.plot(lst_label[:,0])
        plt.show()

    return [axis_corr(preds, lst_label, ax) for ax in [0, 1]] #mean_loss/count

def train_humor(model, criterion, optimizer, train_loader, val_loader, epochs, device):
    # used for predicting target quality
    assert train_loader.dataset.humor
    assert val_loader.dataset.humor
    for epoch in trange(epochs, desc="Epoch"):
        model.train()
        train_loss = 0
        for i, (input_ids, attention_mask, target) in enumerate(iterable=train_loader):
            optimizer.zero_grad()
            input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
            output = model(input_ids, attention_mask)
            loss = criterion(output, target.type_as(output))
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        print(f"Training loss is {train_loss/len(train_loader)}")
        val_loss = evaluate_humor(model, criterion=criterion, dataloader=val_loader, device=device)
        print("Epoch {} complete! Correlations : {}".format(epoch, val_loss))

In [None]:
if p['do_train']:
    train_humor(
        model=humor_model,
        criterion=criterion,
        optimizer=optimizer,
        train_loader=train_loader,
        val_loader=dev_loader,
        epochs = p['epochs'],
        device = device
    )

In [None]:
#stats.spearmanr(np.array([[0.16607934, 0.08131046, -0.50477946, 0.19450632, -0.10593899],[0.13920161, 0.07102472, 0.25097752, 0.09419987, 0.03158583]]), axis=1)




### Evaluate Humor

In [None]:
print("Train")
display_correlation(humor_model, train_loader, device)
print("Dev")
display_correlation(humor_model, dev_loader, device)
print("Test")
display_correlation(humor_model, test_loader, device)