In [None]:
from google.colab import drive
drive.mount('/content/drive/')
%cd "/content/drive/MyDrive/DL4NLP/abstract-to-title-generation/"

from config import *
import sys

In [None]:
!pip install -r "{PROJECT_ROOT}/requirements.txt" &> /dev/null

In [None]:
sys.path.append(f'{PROJECT_ROOT}')
sys.path.append(f'{PROJECT_ROOT}/src')
sys.path.append(f'{PROJECT_ROOT}/deps/BARTScore')
sys.path.append(f'{PROJECT_ROOT}/deps/bert_score')
sys.path.append(f'{PROJECT_ROOT}/RL_bart_xsum/trl/trl')
sys.path.append(f'{PROJECT_ROOT}/deps/emnlp19-moverscore-master')

In [None]:
from transformers import AutoModelForSeq2SeqLM, BartTokenizer, BartModel, BartForConditionalGeneration, BartConfig, GPT2Config,GPT2LMHeadModel
from transformers import top_k_top_p_filtering, GPT2Model
from transformers import GPT2Tokenizer, AutoModel, BartTokenizer, AutoTokenizer
from transformers import BertModel, BertPreTrainedModel, AutoConfig
import torch
from torch import nn
from torch.nn import Identity
import torch.nn.functional as F
import sys
from bart_xsum import ValueHead
import ppo
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd
from threading import active_count
import sys
from bart_score import BARTScorer
import bert_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
import model_utils
import dataset_utils
import random

In [None]:
# pull data only pulls changed data
#!dvc pull -f

## Code Section

In [None]:
def setup_seed(seed):
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  np.random.seed(seed)
  torch.backends.cudnn.deteministic = True
setup_seed(57)

In [None]:
config = BartConfig('facebook/bart-large-xsum', output_hidden_states=True)
model_name = f"{MODEL_DIR}/BART-XSum-humor/"

print(DEVICE_ID)

#load preptrained model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, output_hidden_states=True).to(DEVICE_ID)
#load reference model
ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, output_hidden_states=True).to(DEVICE_ID)

#load reward tokenizer
reward_tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
#load model tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-xsum')

#load reward model
reward_model_dir, reward_model_name, generate_humor, gamma, lam, cliprange = {

    0: ("evaluation_models/reward_model", "finetuned_size140_lr4.0638e-05_ep16_2022-08-18__11_34_24_final", 2, 0.99, 0.85, 0.5),
    1: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 0.99, 0.85, 0.5),

    2: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 0, 0.99, 0.85, 0.5),
    3: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 1, 0.99, 0.85, 0.5),

    4: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 1.0, 0.85, 5.0),
    5: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", [0, 1, 2], 0.99, 0.85, 3.5),
    6: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 0.99, 0.85, 5.0),
    7: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 0.98, 0.85, 5.0),
    8: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 0.99, 0.85, 0.5),
    
    9: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 0.99, 0.9, 0.5),
    10: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 0.995, 0.85, 1.0),
    11: ("evaluation_models/reward_model", "finetuned_size230_lr4.0638e-05_ep16_2022-08-17__16_44_40_final", 2, 0.99, 0.85, 0.5),
    12: ("evaluation_models/reward_model", "only_quality_finetuned_size80_lr4.0638e-05_ep20_2022-08-18__21_29_20_final", None, 0.985, 0.85, 3.5),
    13: ("evaluation_models/reward_model", "only_quality_finetuned_size140_lr4.0638e-05_ep20_2022-08-18__15_05_43_final", None, 0.985, 0.85, 3.5),
    14: ("evaluation_models/reward_model", "only_quality_finetuned_size230_lr4.0638e-05_ep20_2022-08-18__15_17_48_final", None, 0.985, 0.85, 3.5),
    15: ("evaluation_models/reward_model", "only_quality_finetuned_size230_lr4.0638e-05_ep20_2022-08-18__15_17_48_final", None, 0.985, 0.85, 5.0)
}[14]

reward_aconfig = AutoConfig.from_pretrained(model_name)

if generate_humor is not None:
  reward_model = model_utils.HumorBertRegresser.from_pretrained('allenai/scibert_scivocab_uncased')
  model_state = torch.load(f"{PROJECT_ROOT}/{reward_model_dir}/{reward_model_name}/model.pth")

  reward_tokenizer, reward_model = dataset_utils.add_humor_token(reward_tokenizer, reward_model)
  tokenizer, model = dataset_utils.add_humor_token(tokenizer, model)

  ref_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-xsum')
  _, ref_model = dataset_utils.add_humor_token(ref_tokenizer, ref_model)
else:
  reward_model = model_utils.BertRegresser.from_pretrained('allenai/scibert_scivocab_uncased')
  model_state = torch.load(f"{PROJECT_ROOT}/reward_model/checkpoint")[0]
  
reward_model.load_state_dict(model_state)
reward_model.to(DEVICE_ID)
#load Valuehead
hmodel = ValueHead(config).to(DEVICE_ID)

In [None]:
# load annotated data
anno_sample = pd.read_json(f"{DATA_DIR}/annotated/dataset_230samples.json")

def create_tupel(sample_np, humor):
    ab = [
        (f"[humor={humor}]" if humor is not None else "") + sample_np[0]
    ]*6
    ht = [sample_np[1]]*6
    indices = np.array([2, 5, 8, 11, 14, 17])
    sysms = list(sample_np[2].keys())
    sysms = ['original'] + sysms
    gents = list(sample_np[2].values())
    gents = [sample_np[1]] + gents
    gents_scores = list(sample_np[3].values())
    gents_scores = [gents_scores[i] for i in indices]
    max_idx = gents_scores.index(max(gents_scores))
    res = np.transpose(np.array([ab, ht, gents, gents_scores, sysms]))
    return res[max_idx,:].reshape(1,5).tolist(), res.tolist()


res = []
res1 = []
for row in anno_sample.to_numpy():
  
  if type(generate_humor) == list:
    humor = random.choice(generate_humor)
  elif generate_humor is not None:
    humor = generate_humor
  else:
    humor = None

  r1, r2 = create_tupel(row, humor)
  res += r1
  res1 += r2
gen_title_score_pairs = np.array(res1)
gen_title_score_pairs_bestone = np.array(res)
gen_title_score_pairs_bestone[0]

In [None]:
gen_title_score_pairs_bad = []
scores = gen_title_score_pairs_bestone[:,-2]
scores = np.array([float(s) for s in scores])
for row in gen_title_score_pairs_bestone:
  if float(row[-2]) <= np.mean(scores):
    gen_title_score_pairs_bad.append(row)

gen_title_score_pairs_bad = np.array(gen_title_score_pairs_bad)


gen_title_score_pairs_xsum = []
for row in gen_title_score_pairs:
  if row[-1] == 'bart_xsum':
    gen_title_score_pairs_xsum.append(row)

gen_title_score_pairs_xsum = np.array(gen_title_score_pairs_xsum)

gen_title_score_pairs_good = []
scores = gen_title_score_pairs_bestone[:,-2]
scores = np.array([float(s) for s in scores])
for row in gen_title_score_pairs_bestone:
  if float(row[-2]) >= np.mean(scores) or row[-1] == 'bart_xsum':
    gen_title_score_pairs_good.append(row)

gen_title_score_pairs_good = np.array(gen_title_score_pairs_good)

In [None]:
import difflib

In [None]:
"""# **2 Train Mode (Cross learning/ Default RL)**"""
gen_kwargs = {
    "min_length":-1,
    "top_k": 2,
    "top_p": 0.8,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    #"length_penalty" : -20.0,
    #"num_return_sequences" : 5,
    #"repetition_penalty" : 1.5,
}

def generate_reward(tokenizer, query):
    reward_encode = reward_tokenizer(query, return_tensors='pt').to('cuda')
    reward = reward_model(reward_encode['input_ids'], reward_encode['attention_mask']).squeeze()
    return reward.norm().to('cuda')

def ACT_step(title_score_pairs, tokenizer, model, ppo_trainer):
  titles = []
  for i in range(len(title_score_pairs)):
    row = title_score_pairs[i]
    #model_name = row[4]
    ref_title = row[2]
    abstract = row[0]
    #print(query_txt)
    query = '[CLS]' + abstract + '[SEP]'
    
    human_score = float(row[3])
    human_score_tensor = torch.tensor([human_score]).to('cuda')
    ab_tensor = tokenizer(abstract, return_tensors="pt").to('cuda')
    #ref_title_tensor = tokenizer(ref_title, return_tensors='pt').to('cuda')

    ## define a reward for response
    ref_reward = generate_reward(reward_tokenizer, query + ref_title).cpu().item()

    ## train model with ppo
    #train_stats = ppo_trainer.step(ab_tensor["input_ids"], ref_title_tensor["input_ids"], ref_reward)#human_score_tensor)

    # generate title
    gen_title_tensor = model.generate(
        input_ids = ab_tensor["input_ids"],
        attention_mask = ab_tensor["attention_mask"],
        max_new_tokens=40,
        **gen_kwargs
    )[-40:].to('cuda')

    gen_title = tokenizer.batch_decode(
        gen_title_tensor,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )[0]
    
    #test ref model response(generated title from fine-tunde bart-xsum)
    # define a reward for response
    
    reward = generate_reward(reward_tokenizer, query + gen_title).cpu().item()

    # train model with ppo
    train_stats = ppo_trainer.step(ab_tensor["input_ids"], gen_title_tensor, reward)

    if gen_title.strip() != ref_title.strip():
        print(f'Human title:                                     {row[1]}')
        print(f'Reference:                                       {ref_title}\nHuman Score: {human_score}\nReward: {ref_reward}')
        print(f'Generated reference title:                       {gen_title}\n({reward})')
        print('- - - -'*20 + '>')
    
    titles.append((gen_title, reward))
  return titles

In [None]:
def generate_titles(title_score_pairs, tokenizer, model, titles):
  scores = []
  gen_titles = []
  for i in range(len(title_score_pairs)):
    title, reward = titles[i]
    row = title_score_pairs[i]
    #model_name = row[4]
    ref_title = row[2]
    abstract = row[0]
    #print(query_txt)
    query = '[CLS]' + abstract + '[SEP]'
    
    human_score = float(row[3])
    human_score_tensor = torch.tensor([human_score]).to('cuda')
    ab_tensor = tokenizer(abstract, return_tensors="pt").to('cuda')

    gen_title_ids = model.generate(
        input_ids = ab_tensor["input_ids"],
        attention_mask = ab_tensor["attention_mask"],
        max_new_tokens=40,
        **gen_kwargs
    )[-40:].to('cuda')

    gen_title_ids = gen_title_ids.cpu().data.numpy()
    gen_title_txt = tokenizer.batch_decode(
        gen_title_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )[0]
    gen_reward = generate_reward(reward_tokenizer, query + gen_title_txt).cpu().item()

    if gen_title_txt.strip() != title.strip():
        print(f'Generated reference title:                       {title}\n({reward})')
        print(f'RL-rewarded-after-step bart_xsum generated title: {gen_title_txt}\nReward: {gen_reward}')
        #print([li for li in difflib.ndiff(gen_title, new_title_txt) if li[0] != ' '])
        print('- - - -'*20 + '>')
    gen_titles.append((i, abstract, title, gen_title_txt))
    scores.append((reward, gen_reward))
  return gen_titles, scores

# test setup

In [None]:
"""# **Train Settings and Do Train**"""

# initialize trainer
#ppo_config = {'batch_size': 1, 'forward_batch_size': 1}
new_titles = None
gen_titles = None
scores = None
torch.cuda.empty_cache()
ppo_config = {
    "lr": 2e-6,#1.41e-5,#6e-7,#"lr": 3e-6,#,
    "adap_kl_ctrl": True,
    "init_kl_coef": 0.2,
    "target": 6,
    "horizon": 10000,
    "gamma": gamma, #0.99,
    "lam": lam, #0.95,
    "cliprange": cliprange, #0.5,
    "cliprange_value": cliprange, #0.5,
    "vf_coef": .1,
    "batch_size": 1,
    "forward_batch_size": 1,
    "ppo_epochs": 4,
}
ppo_trainer = ppo.PPOTrainer(model, ref_model, hmodel, **ppo_config)
new_titles = ACT_step(gen_title_score_pairs_bestone, tokenizer, model, ppo_trainer)

#res = RL_steps(gen_title_score_pairs_bad, 0, len(gen_title_score_pairs_bad), tokenizer, model, ppo_trainer)

In [None]:
gen_titles, scores = generate_titles(gen_title_score_pairs_bestone, tokenizer, model, new_titles)

In [None]:
import itertools
import functools

dir_path = f"{OUTPUT_DIR}/generated_titles/{reward_model_name}_humor_{generate_humor}_ppo_{gamma}_{lam}_{cliprange}_/"
if os.path.isdir(dir_path):
  for p, _, fs in os.walk(dir_path, topdown=False):
    for f in fs: # delete files anywhere in dirs tree
      os.remove(f"{p}/{f}")
else:
  os.mkdir(dir_path)

def accum(acc, s):
  new = acc[0] + s[1] - s[0]
  return (new, acc[1] + [new])
  
plt.plot(functools.reduce(accum, scores, (0.0, []))[1])
plt.savefig(f"{dir_path}/learning_curve.png")

df = pd.DataFrame(scores, columns=["ref_reward", "new_reward"])
df.to_csv(f"{dir_path}/scores.csv")

df = pd.DataFrame(gen_titles, columns=["index", "abstract", "ref_title", "new_title"])
df.to_csv(f"{dir_path}/titles.csv")

In [None]:
query_txt = "This bachelor thesis explores the generation of title based on a given abstract using neural language model. Recently, neural language models have been used in many scenarios with practical applications. For example, in scientific writing, automatic summary generation from long texts is used to assist in the reading and selection of relevant scientific articles. Title is an important part of scientific article, but the title generation using neural language and optimization for neural language model based on human preferences are less studied. This thesis addresses this gap and presents an optimized model based on state-of-the-art pre-trained neural language model which generate human-preferred titles from a given abstract. The model is fine-tuned on datasets of scientific article and optimized from human preferences using the novel learning perspective in reinforcement learning environment. The result shows that, the neural language model have powerful capabilities on the abstract-to-title task and the reinforcement learning approach is effective in scalable learning of neural language model."

query_tensor = tokenizer(query_txt, return_tensors="pt").to('cuda')

In [None]:
ref_tensor=model.generate(
    input_ids = query_tensor["input_ids"],
    attention_mask = query_tensor["attention_mask"],
    max_new_tokens=40,
    **gen_kwargs
)[-40:].to('cuda')

In [None]:
ref_tensor = model.generate(
      input_ids = query_tensor["input_ids"],
      attention_mask = query_tensor["attention_mask"],
      max_length = 40,
      num_beams = 5,
      num_return_sequences = 1,
      repetition_penalty=2.0,
      length_penalty=10.0,
      early_stopping = True,
    ).to('cuda')

In [None]:
ref_txt = tokenizer.batch_decode(ref_tensor, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
ref_txt


# **Generation**



In [None]:
outres = []
for row in gen_title_score_pairs_bestone:
  ab = row[0]
  ht = row[1]
  ogt = row[2]
  query_txt = ab
  query_tensor = tokenizer(query_txt, return_tensors="pt").to('cuda')
  #get ref model response
  '''ref_tensor = ref_model.generate(
              input_ids = query_tensor["input_ids"],
              attention_mask = query_tensor["attention_mask"],
              max_length = 30,
              num_beams = 5,
              num_return_sequences = 1,
              repetition_penalty=2.0, 
              length_penalty=10.0,
              early_stopping = True,
              ).to('cuda')'''

  ref_tensor=ref_model.generate(input_ids = query_tensor["input_ids"], 
                          attention_mask = query_tensor["attention_mask"], 
                          max_new_tokens=40,
                          **gen_kwargs)[-40:].to('cuda')
  ref_txt = tokenizer.batch_decode(ref_tensor, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
  
  # get model response
  '''response_tensor = model.generate(
              input_ids = query_tensor["input_ids"],
              attention_mask = query_tensor["attention_mask"],
              max_length = 30,
              num_beams = 5,
              num_return_sequences = 1,
              repetition_penalty=2.0, 
              length_penalty=10.0,
              early_stopping = True,
              ).to('cuda')'''
  response_tensor = model.generate(
      input_ids = query_tensor["input_ids"],
      attention_mask = query_tensor["attention_mask"],
      max_new_tokens=40,
      **gen_kwargs
    )[-40:].to('cuda')

  response_txt = tokenizer.batch_decode(response_tensor, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
  outres.append([ab, ht,ogt, ref_txt, response_txt])

In [None]:
df = pd.DataFrame(outres)
df.columns = ['abstract', 'original title','best title' , 'generated title before RL', 'generated title after RL']
path=f'{OUTPUT_DIR}\act_'+ "{:.9f}".format(ppo_config['lr'])+'_'+str(ppo_config['ppo_epochs'])+'.csv'
#df = df.drop(['original title'], axis=1)
#df.columns = ['abstract', 'best title', 'title-xsum', 'title-xsum-reward']
df_np = df.to_numpy()

n_np = []
for row in df_np:
  ab = row[0]
  ts = row[1:]
  #ts[0] = ts[0]
  #ts[0] = ts[0]
  #ts = np.random.permutation(ts)
  n_np.append([ab, ts[1], ts[2]])
n_np = np.array(n_np)
se = pd.DataFrame(n_np[:30])
se

# **Analysis**

In [None]:
# %cd /content/drive/MyDrive/Thesis/BARTScore

bart_scorer = BARTScorer(device='cuda:0', checkpoint='facebook/bart-large-xsum')


from moverscore_v2 import get_idf_dict, word_mover_score
from typing import List, Union, Iterable
from collections import defaultdict
import numpy as np

def sentence_score(hypothesis: str, references: List[str], trace=0):
    
    idf_dict_hyp = defaultdict(lambda: 1.)
    idf_dict_ref = defaultdict(lambda: 1.)
    
    hypothesis = [hypothesis] * len(references)
    
    sentence_score = 0 

    scores = word_mover_score(references, hypothesis, idf_dict_ref, idf_dict_hyp, stop_words=[], n_gram=1, remove_subwords=False)
    
    sentence_score = np.mean(scores)
    
    if trace > 0:
        print(hypothesis, references, sentence_score)
            
    return sentence_score

# Commented out IPython magic to ensure Python compatibility.

In [None]:
abs = df['abstract'].to_list()
ots = df['original title'].to_list()
bts = df['best title'].to_list()
ogts = df['generated title before RL'].to_list()
rlts = df['generated title after RL'].to_list()

In [None]:
#BartScore
bart_scores = [bart_scorer.score(abs, ts, batch_size=1) for ts in [ots, ogts, rlts]]

avg_bart = np.array(bart_scores).mean(axis=1)
print('abs_ots: ', avg_bart[0])
print('abs_ogts: ', avg_bart[1])
print('abs_rlts: ', avg_bart[2])

In [None]:
#MoverScore
mover_scores = [[sentence_score(t, [ab]) for t,ab in zip(ts, abs)] for ts in [ots, ogts, rlts]]
avg_mover = np.array(mover_scores).mean(axis=1)
print('abs_ots: ', avg_mover[0])
print('abs_ogts: ', avg_mover[1])
print('abs_rlts: ', avg_mover[2])

In [None]:
#BertScore
bert_scores = [bert_score.score(ts, abs, lang="en")[2].tolist() for ts in [ots, ogts, rlts]]
print(bert_scores)
avg_bert = np.array(bert_scores).mean(axis=1)

print('abs_ots: ', avg_bert[0])
print('abs_ogts: ', avg_bert[1])
print('abs_rlts: ', avg_bert[2])

In [None]:
df = pd.DataFrame(np.array(bart_scores).T, columns=["original title", "generated title before RL", "generated title before RL"])
df.to_csv(f"{dir_path}/bart_scores.csv")
df = pd.DataFrame(np.array(bert_scores).T, columns=["original title", "generated title before RL", "generated title before RL"])
df.to_csv(f"{dir_path}/bert_scores.csv")
df = pd.DataFrame(np.array(mover_scores).T, columns=["original title", "generated title before RL", "generated title before RL"])
df.to_csv(f"{dir_path}/mover_scores.csv")

In [None]:
r1 = [np.exp(-0.654991332689921), 0.8518536269664765, 0.5175741747308048]

In [None]:
r2 = [np.exp(-0.6468217780192693), 0.8508123060067495, 0.5174440166876286]

In [None]:
#r3 = [np.exp(-0.6468217780192693), 0.5174440166876286, 0.8508123060067495]

In [None]:
normalized_metrics = normalize(np.array([r1, r2]), axis=0, norm='l1')
normalized_metrics

In [None]:
normalized_metrics = normalize(np.array([r1, r2]), axis=0, norm='l1')
normalized_metrics