In [1]:
%load_ext autoreload
%autoreload 2

In [14]:
import wandb
import time
import os
from tqdm import tqdm
import pandas as pd
import transformers
import torch
import numpy as np
import random
import sys
sys.path.insert(1, '/home/ubuntu/RL-storage/rl-simp/trl')

from tqdm import tqdm
from sklearn.utils import shuffle
from transformers import (DebertaForSequenceClassification,
                          Trainer,
                          TrainingArguments,
                          DebertaTokenizerFast,
                          PegasusTokenizer,
                          AutoModelForSeq2SeqLM,
                          AutoTokenizer
                         )
from pattern.en import lexeme
from sentence_transformers import SentenceTransformer, util
from trl.simplification_reward import SimplificationReward
from trl.pegasus import PegasusWithValueHeadModel, respond_to_batch_pegasus
from trl.ppo_v2 import PegasusPPOTrainer
from trl.core import split_batch_encoding, calculate_sari_easse


CUDA_LAUNCH_BLOCKING="1"

# torch_device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [3]:
# import nltk
# nltk.download('omw-1.4', download_dir='/home/ubuntu/RL-storage/setup/anaconda3/envs/rl-simp/nltk_data')
# import trl
# trl.__file__
# trl.pegasus.__file__
# transformers.__version__

In [11]:
config = {
    "lm_name": '../saved-models/Bart-paraphrasing/checkpoint-5500',
    # "lm_name": 'tuner007/pegasus_paraphrase',
    # "ref_lm_name": 'tuner007/pegasus_paraphrase',
    "ref_lm_name": '../saved-models/Bart-paraphrasing/checkpoint-5500',
    # "tk_name": "tuner007/pegasus_paraphrase",
    "tk_name": "../saved-models/Bart-paraphrasing/checkpoint-5500",
    "steps": 50000,
    "batch_size": 256,
    "batch_size_test": 40,
    "forward_batch_size": 16,
    "ppo_epochs": 4,   
    "txt_in_len": 58,
    "txt_out_len": 50,
    "lr": 1.41e-5,
    "init_kl_coef":0.2,
    "target": 6,
    "horizon":10000,
    "gamma":1,
    "lam":0.95,
    "cliprange": .2,
    "cliprange_value":.2,
    "vf_coef":.1, 
    "device_ppo": 'cuda:0',
    "device_ref": 'cuda:0',
    "device_reward": 'cuda:0',
    "reward_coef_sup": 0.125,
    "reward_coef_unsup": 10,
    "alpha": 0.8,
    "beta": 1.0,
    "optimizer": "Adam"
}

### Loading Data

In [5]:
asset_valid_path_ref = '../data/asset/dataset/ref-test/asset.test.simp.1'
asset_valid_path_orig = '../data/asset/dataset/asset.test.orig'
wikilarge_train_path_ref = '../data/wikilarge/data-simplification/wikilarge/wiki.full.aner.ori.train.dst'
wikilarge_train_path_orig = '../data/wikilarge/data-simplification/wikilarge/wiki.full.aner.ori.train.src'
newsela_auto_train_path_orig = "../data/newsela-auto/newsela-auto/newsela-auto/ACL2020/train.src"
newsela_auto_train_path_ref = "../data/newsela-auto/newsela-auto/newsela-auto/ACL2020/train.dst"


In [6]:
# orig_wiki_sents = open(wikilarge_train_path_orig).read().split('\n')
# ref_wiki_sents = open(wikilarge_train_path_ref).read().split('\n')
asset_sents_orig = open(asset_valid_path_orig).read().split('\n')
asset_sents_ref = open(asset_valid_path_ref).read().split('\n')

In [7]:
x_newsela = open(newsela_auto_train_path_orig).read().split('\n')
y_newsela = open(newsela_auto_train_path_ref).read().split('\n')
x_newsela, y_newsela = shuffle(x_newsela, y_newsela)

smaple_size = 50000
i = random.randrange(len(x_newsela) - smaple_size)
orig_newsela_auto_sents = x_newsela[i:i+smaple_size]
ref_newsela_auto_sents = y_newsela[i:i+smaple_size]

In [8]:
asset_sents_orig[100], asset_sents_ref[100], len(asset_sents_orig)

('It will then dislodge itself and sink back to the river bed in order to digest its food and wait for its next meal.',
 'It digests its food in the water. It waits for prey in the water.',
 359)

### Loading the Reward Model

In [12]:
# root_comp_simp = "/home/m25dehgh/simplification/complex-classifier"
# model_comp_simp = "newsela-auto-high-quality"
# path_comp_simp = root_comp_simp + '/results' + '/' + model_comp_simp + "/whole-high-quality/checkpoint-44361/"

# path to saved checkpoint of deberata model trained on simple and complex corpora
path_comp_simp = "../saved-models/comp-simp-classifier/checkpoint-44361/"
comp_simp_class_model = DebertaForSequenceClassification.from_pretrained(path_comp_simp).to(config['device_reward'])
comp_simp_class_model.eval()
tokenizer_deberta = DebertaTokenizerFast.from_pretrained('microsoft/deberta-base')
semantic_model = SentenceTransformer('all-mpnet-base-v2', device=config['device_reward'])

In [10]:

input_a = ['below are some useful links to facilitate your involvement.',
          'below are some useful links to facilitate your involvement.',
          'A massive glacier had crashed down the mountain.',]
input_b = ['below are some useful links to make it easier for you to get involved.',
          'below are some useful links to facilitate your involvement.',
          'A massive glacier']

model_reward = SimplificationReward(comp_simp_class_model, tokenizer_deberta, semantic_model, **config)
# model_reward = SimplificationReward(comp_simp_class_model, tokenizer_deberta, comp_simp_class_model, **config)
model_reward.cal_reward(input_a, input_b)

tensor([7.4879, 0.4755, 6.2941], device='cuda:0')

### Loading Pegasus Paraphrasing Model with Value head

In [15]:
pegasus_model = AutoModelForSeq2SeqLM.from_pretrained(config['lm_name']).to(config['device_ppo'])
pegasus_model.eval()

pegasus_model_ref = AutoModelForSeq2SeqLM.from_pretrained(config['ref_lm_name']).to(config['device_ref'])
pegasus_tokenizer = AutoTokenizer.from_pretrained(config['tk_name'])

# pegasus_model = PegasusWithValueHeadModel.from_pretrained(config['lm_name']).to(config['device_ppo'])
# pegasus_model.eval()

# pegasus_model_ref = PegasusWithValueHeadModel.from_pretrained(config['ref_lm_name']).to(config['device_ref'])
# pegasus_tokenizer = PegasusTokenizer.from_pretrained(config['tk_name'])

### W & B Logger

In [16]:
wandb.init(name='run-1-sari', project='simp-rl-Bart', config=config)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc


In [10]:
wandb.watch(pegasus_model, log='all')

[<wandb.wandb_torch.TorchGraph at 0x7fead62d6e20>]

### Evaluation

In [8]:
def eval_valdition(model, config, random_batch_indices=None, return_sents=False, test_device=None):
    torch.cuda.empty_cache()
    logs = dict()
    game_data = dict()
    timing = dict()
    input_dict = dict()
    t0 = time.time()
    fbs = config['forward_batch_size']
    
    if test_device == None:
        test_device = config['device_ppo']
    
    if random_batch_indices == None:
        #### get a batch from the dataset
        random_batch_indices = random.choices(list(range(0, len(asset_sents_orig))), 
                                              k=config['batch_size_"test'])
        
    game_data['query'] = [asset_sents_orig[i] for i in random_batch_indices]
    game_data['references'] = [asset_sents_ref[i] for i in random_batch_indices]

    query_tensors = pegasus_tokenizer(game_data['query'], 
                                        truncation=True,
                                        padding='longest',
                                        max_length=config["txt_in_len"], 
                                        return_tensors="pt")

    query_tensors['decoder_input_ids'] = torch.zeros(query_tensors["input_ids"].shape[0], 1, dtype=int)
#     query_tensors.to(config['device_ppo'])

    #### get response from pegasus
    
    t = time.time()
    response_tensors_dict = {
        "input_ids": [],
        "attention_mask": [],
        "decoder_input_ids": [],
    }

    game_data['response'] = []
    for i in tqdm(range(int(query_tensors["input_ids"].shape[0]/fbs))):
        model_input = split_batch_encoding(query_tensors, i*fbs, (i+1)*fbs).to(test_device)
        respond_to_batch_pegasus(model, model_input, txt_len=config['txt_out_len'], no_explr=True)
        response_tensors_dict["decoder_input_ids"].append(model_input['decoder_input_ids'])

        game_data['response'] += [pegasus_tokenizer.decode(response_tensors_dict['decoder_input_ids'][i][j, :]) for j in range(fbs)]

    rewards = calculate_sari_easse(game_data['references'], game_data['response'], game_data['query'])
    sari_scores = torch.tensor(rewards).mean(dim=-1)
    
    if return_sents:
        return sari_scores, game_data # game_data['response']
    
    return sari_scores

def save_model(model, 
               model_number=14, 
               path="/home/m25dehgh/miscellaneous/rl/project/rl-simp/checkpoints/unsp-klD-no-similarity"):
    
    saving_path = path + "/" + str(model_number)
    if not os.path.exists(saving_path):
#         model_number += 1
#         saving_path = path + "/" + str(model_number)
        os.makedirs(saving_path)
        model.save_pretrained(saving_path)

### Training PPO

In [14]:
query_tokens = pegasus_tokenizer(orig_newsela_auto_sents,
                                 truncation=True,
                                 padding='longest',
                                 max_length=config["txt_in_len"],
                                 return_tensors="pt")

In [15]:
ppo_trainer = PegasusPPOTrainer(pegasus_model, pegasus_model_ref, **config)
pegasus_model.eval()
fbs = config['forward_batch_size']
best_score = 0
testing_indices = random.choices(list(range(0, len(asset_sents_orig))), k=config['batch_size_test'])

In [16]:
for epoch in tqdm(range(int(np.ceil(config["steps"]/config['batch_size'])))):
    torch.cuda.empty_cache()
    logs = dict()
    game_data = dict()
    timing = dict()
    input_dict = dict()
    t0 = time.time()

    #### get a batch from the dataset
    random_batch_indices = random.choices(list(range(0, query_tokens['input_ids'].shape[0])), 
                                          k=config['batch_size'])
    game_data['query'] = [orig_newsela_auto_sents[i] for i in random_batch_indices]
    game_data['references'] = [ref_newsela_auto_sents[i] for i in random_batch_indices]
    input_dict['input_ids'] = query_tokens['input_ids'].index_select(0, torch.tensor(random_batch_indices))
    input_dict['attention_mask'] = query_tokens['attention_mask'].index_select(0, torch.tensor(random_batch_indices))
    query_tensors = transformers.tokenization_utils_base.BatchEncoding(input_dict)
    query_tensors['decoder_input_ids'] = torch.zeros(query_tensors["input_ids"].shape[0], 1, dtype=int)
    query_tensors.to(config['device_ppo'])

#     print("game_data['query'] ", game_data['query'])
#     print("input_dict['input_ids'] ", input_dict['input_ids'])
#     print("input_dict['attention_mask'] ", input_dict['attention_mask'])
#     print("query_tensors ", query_tensors)

    #### get response from pegasus
    t = time.time()
    # total_length = config['txt_in_len']+config['txt_out_len']
    response_tensors_dict = {
        "input_ids": [],
        "attention_mask": [],
        "decoder_input_ids": [],
    }

    for i in range(int(config['batch_size']/fbs)):
        model_input = split_batch_encoding(query_tensors, i*fbs, (i+1)*fbs)
#         print("model_input: ", model_input)
        respond_to_batch_pegasus(pegasus_model, model_input, txt_len=config['txt_out_len'], no_explr=False)
        response_tensors_dict["input_ids"].append(model_input['input_ids'])
        response_tensors_dict["attention_mask"].append(model_input['attention_mask'])
        response_tensors_dict["decoder_input_ids"].append(model_input['decoder_input_ids'])

    response_tensors_dict['input_ids'] = torch.cat(response_tensors_dict["input_ids"])
    response_tensors_dict['attention_mask'] = torch.cat(response_tensors_dict["attention_mask"])
    response_tensors_dict['decoder_input_ids'] = torch.cat(response_tensors_dict["decoder_input_ids"])


    game_data['response'] = [pegasus_tokenizer.decode(response_tensors_dict['decoder_input_ids'][i, :]) for i in range(config['batch_size'])]
    timing['time/get_response'] = time.time()-t

#     print("response_tensors_dict ", response_tensors_dict)
    print("game_data['response']:  ", game_data['response'])
#     print("game_data['references']:  ", game_data['references'])
#     print("game_data['query']:  ", game_data['query'])

    ### get simplicity score
    t = time.time()
    
        ### unsupervised rewards
    
#     rewards = []
#     for i in range(int(config['batch_size']/fbs)):
#         res = model_reward.cal_reward(game_data['query'][i*fbs:(i+1)*fbs],
#                                       game_data['response'][i*fbs:(i+1)*fbs]).detach()
#         rewards.append(res)
#     rewards = torch.cat(rewards).to(config['device_ppo'])
    
        ### Supervised rewards (SARI)

    rewards = calculate_sari_easse(game_data['references'], game_data['response'], game_data['query'])[0]
    rewards = torch.tensor(rewards).to(config['device_ppo']) * config['reward_coef_sup']

    timing['time/get_sentiment_preds'] = time.time()-t

    print("rewards: ", rewards)


    #### Run PPO training 
    t = time.time()
    stats = ppo_trainer.step(response_tensors_dict, rewards)
    timing['time/optimization'] = time.time()-t
    
    sari = eval_valdition(pegasus_model, config, random_batch_indices=testing_indices)
    
    if sari[0] > best_score:
        print("Best Sari so far: ", str(sari))
        save_model(pegasus_model, model_number=str(sari[0].item())[:5])
        best_score = sari[0]

    #### Log everything
    timing['time/epoch'] = time.time()-t0
    table_rows = [list(r) for r in zip(game_data['query'], game_data['response'], rewards.cpu().tolist())]
    logs.update({'game_log':wandb.Table(
        columns=['query', 'response', 'reward'],
        rows=table_rows)})
    logs.update(timing)
    logs.update(stats)
    logs['env/reward_mean'] = torch.mean(rewards).cpu().numpy()
    logs['env/reward_std'] = torch.std(rewards).cpu().numpy()
    logs['env/reward_dist'] = rewards.cpu().numpy()
    wandb.log(logs)

  0%|          | 0/6250 [00:00<?, ?it/s]

game_data['response']:   ['There is nothing francophones can do about declining elephant and rhino populations.7-10hz Givenchyalise piesconnoisseur Nyack', "The President described the visit to the first family as'meaningful', despite the seclusion the family has enjoyed concerning their genealogy. Kobayashi' Resumeaka brominated leukaemia resurfacedigining", "The case broughtGRP is a very controversial one which has been going all eMarketer for more than 20 years. Handel Kennard has been performing Desmond's Principles of Sports for Coins for over 13 years speeds up 161", 'parents help find mates for EURUSD4.00 kickstarter funds gainful employment". homeopathic remedyminister Annette McMahon had an appointment to chat with puckers 05:45 Etobicoke.arda ciment east Getty aid.', 'The West was the location in 1969 how Shima had less than Brinkswipe Giveaway in 1700, Western US had more poor Nectars in 2012/13. clearedidy marked trailed. Find Free', "Because of the seriousness of Recognize

  0%|          | 0/6250 [00:05<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 1; 10.76 GiB total capacity; 5.73 GiB already allocated; 128.12 MiB free; 6.15 GiB reserved in total by PyTorch)

In [13]:
test_path = "/home/m25dehgh/miscellaneous/rl/project/rl-simp/checkpoints/unsp-klD-no-similarity/39.52"
pegasus_model_test = PegasusWithValueHeadModel.from_pretrained(test_path).to('cuda:1')
pegasus_model_test.eval()

PegasusWithValueHeadModel(
  (model): PegasusModel(
    (shared): Embedding(96103, 1024, padding_idx=0)
    (encoder): PegasusEncoder(
      (embed_tokens): Embedding(96103, 1024, padding_idx=0)
      (embed_positions): PegasusSinusoidalPositionalEmbedding(60, 1024)
      (layers): ModuleList(
        (0): PegasusEncoderLayer(
          (self_attn): PegasusAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_aff

In [9]:
# scores_active_model = eval_valdition(pegasus_model_test, config, list(range(0, 350)), test_device='cuda:1')
scores_ref_model, game_data = eval_valdition(pegasus_model_ref, config, list(range(0, 352)), return_sents=True, test_device=config['device_ref'])
# scores_active_model, scores_ref_model

100%|██████████| 44/44 [01:59<00:00,  2.71s/it]


In [31]:
scores_ref_model, game_data

(tensor([38.3632,  7.9068, 61.0664, 46.1163]),
 {'query': ['One side of the armed conflicts is composed mainly of the Sudanese military and the Janjaweed, a Sudanese militia group recruited mostly from the Afro-Arab Abbala tribes of the northern Rizeigat region in Sudan.',
   "Jeddah is the principal gateway to Mecca, Islam's holiest city, which able-bodied Muslims are required to visit at least once in their lifetime.",
   'The Great Dark Spot is thought to represent a hole in the methane cloud deck of Neptune.',
   'His next work, Saturday, follows an especially eventful day in the life of a successful neurosurgeon.',
   'The tarantula, the trickster character, spun a black cord and, attaching it to the ball, crawled away fast to the east, pulling on the cord with all his strength.',
   'There he died six weeks later, on 13 January 888.',
   'They are culturally akin to the coastal peoples of Papua New Guinea.',
   'Since 2000, the recipient of the Kate Greenaway Medal has also been 

In [6]:
import torch
CUDA_LAUNCH_BLOCKING="1"
print(torch.zeros(100000, 100).to(torch.device('cuda:2')))

RuntimeError: CUDA error: invalid device ordinal

In [20]:
save_model(pegasus_model)

In [30]:
new_model = PegasusWithValueHeadModel.from_pretrained("/home/m25dehgh/miscellaneous/rl/project/rl-simp/checkpoints")

In [None]:
wandb.init(mode="disabled")

In [26]:
def save_output(file_name, saving_path, sys_sents):
    with open(saving_path + '/' + file_name, "w") as file:
        for i in range(len(sys_sents)):
            file.write(sys_sents[i] + "\n")

In [32]:
path_saving_output = "/home/m25dehgh/miscellaneous/rl/project/rl-simp/outputs/sup-reward-report"
save_output("sys", path_saving_output, game_data['response'])
save_output("ref", path_saving_output, game_data['references'])
save_output("orig", path_saving_output, game_data['query'])

In [4]:
0.2 ** 0

1.0

In [20]:
acu_len_output = 0.
for i in tqdm(range(len(game_data['response']))):
    output_sent = game_data['response'][i]
    len_output = len(output_sent.split())
    acu_len_output += len_output
acu_len_output / len(game_data['response'])

100%|██████████| 352/352 [00:00<00:00, 856377.61it/s]


14.707386363636363

In [11]:
import sys


In [12]:
sys.path.append("/home/m25dehgh/simplification/controllable-simplification")

In [13]:
from src.normalizing import all_norms
from easse.sari import corpus_sari, get_corpus_sari_operation_scores

In [14]:
def calculate_sari_easse(ref_folder_path, sys_sents, orig_file_path):
    orig_sents = open(orig_file_path, encoding='utf-8').read().split('\n')

    orig_sents = orig_sents[:len(sys_sents)]
    orig_sents = all_norms(orig_sents)

    sys_sents = all_norms(sys_sents)

    ref_sents = []

    for i, file_path in enumerate(Path(ref_folder_path).glob("*")):
        f = open(file_path).read().split('\n')
        ref_sent_i = f[:len(sys_sents)]
        ref_sent_i = all_norms(ref_sent_i)
        ref_sents.append(ref_sent_i)

    add, keep, delete = get_corpus_sari_operation_scores(orig_sents=orig_sents, sys_sents=sys_sents,
                                                         refs_sents=ref_sents)
    overal_sari = (add + keep + delete) / 3

    print(f'overal sari:{overal_sari}\
    add: {add}, keep: {keep}, delete: {delete}')

    return {"overall_sari": overal_sari, "addition": add, "keep": keep, "deletion": delete}

In [46]:
sari_scores = []
add_scores = []
delete_scores = []
keep_scores = []

for (ref, sys, orig) in zip(game_data['references'], 
                            game_data['response'], 
                            game_data['query']):

    add, keep, delete = get_corpus_sari_operation_scores(orig_sents=[orig], sys_sents=[sys],
                                                         refs_sents=[[ref]])
    overal_sari = (add + keep + delete) / 3
    sari_scores.append(overal_sari)
    add_scores.append(add)
    delete_scores.append(delete)
    keep_scores.append(keep)

In [59]:
add, keep, delete = get_corpus_sari_operation_scores(sys_sents=game_data['response'], refs_sents=[game_data['references']],
                                orig_sents=game_data['query'])

In [17]:
add, keep, delete = get_corpus_sari_operation_scores(sys_sents=all_norms(game_data['response']), 
                                                     refs_sents=[all_norms(game_data['references'])],
                                                    orig_sents=all_norms(game_data['query']))

In [18]:
add, keep, delete

(7.165090225835084, 53.311433891522974, 63.73646440012346)

In [19]:
(add + keep + delete) / 3

41.40432950582717

In [51]:
all_norms(game_data['references'])[10], game_data['references'][10]

('allesandro `` sandro `` mazzola is an italian former football player .',
 'Allesandro "Sandro" Mazzola is an Italian former football player.')

In [62]:
pegasus_model_ref.config

PegasusConfig {
  "_name_or_path": "tuner007/pegasus_paraphrase",
  "activation_dropout": 0.1,
  "activation_function": "relu",
  "add_bias_logits": false,
  "add_final_layer_norm": true,
  "architectures": [
    "PegasusForConditionalGeneration"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 16,
  "decoder_start_token_id": 0,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 16,
  "eos_token_id": 1,
  "extra_pos_embeddings": 1,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "id2label": {
    "0": "LABEL_0"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "label2id": {
    "LABEL_0": 0
  },
  "length_penalty": 0.8,
  "max_length": 60,
  "max_position_embeddings": 60,
  "model_type": "pegasus