In [4]:
import math
import os
import pprint
import logging

import nltk
import numpy as np
import torch
from tqdm.auto import tqdm

import transformers
from accelerate import Accelerator
from filelock import FileLock
from transformers import AdamW, get_scheduler, set_seed

from transformers.file_utils import is_offline_mode
from transformers.utils.versions import require_version

from args import parse_args
from data_loader import raw_data_loader, data_processor
from model_loader import model_loader
from rouge_s import py_rouge_scores
from scoring import bleu_scores, meteor_scores
from utils import label_smoothed_nll_loss, postprocess_text

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class Args:
    output_dir = "./output/sample"
    train_file = "./data/samsum/train_small.csv"
    validation_file = "./data/samsum/val_small.csv"
    test_file = "./data/samsum/test_small.csv"
    text_column = "dialogue"
    summary_column = "summary"
#     model_name_or_path = "t5-base"
    model_name_or_path = "./output/run_cngen_turns_samsum_bart_base/best"
    model_type = "bart"
    source_prefix = ""
    max_source_length = 1024
    min_target_length = 1
    max_target_length = 128
    learning_rate = 5e-5
    weight_decay = 1e-3
    label_smoothing = 0.1
    length_penalty = 1.0
    num_train_epochs = 4
    per_device_train_batch_size = 1
    gradient_accumulation_steps = 16
    per_device_eval_batch_size = 1
    per_device_test_batch_size = 1
    num_warmup_steps = 0
    cache_dir = "./output/cache"
    overwrite_cache = True
    seed = 12345
    
    ignore_pad_token_for_loss = True
    preprocessing_num_workers = None
    overwrite_cache = None
    num_beams = None
    pad_to_max_length = True
    config_name = None
    tokenizer_name = "t5-base"
    use_slow_tokenizer = True
    max_train_steps = None
    lr_scheduler_type = "linear"
    shuffle = False
    
args=Args()

In [6]:
# =  =  =  =  =  =  =  =  =  = Logging Setup =  =  =  =  =  =  =  =  =  =  =  = 
logger = logging.getLogger(__name__)
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)

In [7]:
# =  =  =  =  =  =  =  =  =  = Pre-check Package Info =  =  =  =  =  =  =  =  =  =  =  = 
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

try:
    nltk.data.find("tokenizers/punkt")
except (LookupError, OSError):
    if is_offline_mode():
        raise LookupError(
            "Offline mode: run this script without TRANSFORMERS_OFFLINE first to download nltk data files"
        )
    with FileLock(".lock") as lock:
        nltk.download("punkt", quiet=True)

In [8]:
# = = = = = = = = = = = = = Main Process = = = = = = = = = = = = = = = = = =
# Display Parameters
logging.info("*** Parameters ***")
for item, value in vars(args).items():
    logging.info("{}: {}".format(item, value))
logging.info("")

# Initialize the accelerator. The accelerator will handle device placement for us.
accelerator = Accelerator()
logger.info(accelerator.state)

# Setup logging, we only want one process per machine to log things on the screen.
# accelerator.is_local_main_process is only True for one process per machine.
logger.setLevel(logging.INFO if accelerator.is_local_main_process else logging.ERROR)
if accelerator.is_local_main_process:
    #datasets.utils.logging.set_verbosity_warning()
    transformers.utils.logging.set_verbosity_info()
else:
    #datasets.utils.logging.set_verbosity_error()
    transformers.utils.logging.set_verbosity_error()

# If passed along, set the training seed now.
if args.seed is not None:
    set_seed(args.seed)
    torch.backends.cudnn.enabled = False 
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

if accelerator.is_main_process:
    if args.output_dir is not None:
        os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()

# load raw dataset
raw_datasets = raw_data_loader(args)

# load model (config, tokenizer, s2s model)
config, tokenizer, model = model_loader(accelerator, logger, args)

# data processor (for DataLoader)
dataloader, processed_dataset = data_processor(logger, args, accelerator, raw_datasets, tokenizer, model)
train_dataloader, eval_dataloader, test_dataloader = dataloader
train_dataset, _, _ = processed_dataset

07/15/2022 09:07:17 - INFO - root - *** Parameters ***
07/15/2022 09:07:17 - INFO - root - 
07/15/2022 09:07:17 - INFO - __main__ - Distributed environment: NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda
Use FP16 precision: False

loading configuration file ./output/run_cngen_turns_samsum_bart_base/best/config.json
Model config BartConfig {
  "_name_or_path": "./output/run_cngen_turns_samsum_bart_base/best",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder

In [9]:
# = = = Training Preparation = = =
# Split weights in two groups, one with weight decay and the other not.
no_decay = ["bias", "LayerNorm.weight"]

no_decay_emb_matrix = ["bias", "LayerNorm.weight"]

optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay_emb_matrix)],
        "weight_decay": args.weight_decay,
    },
    {
        "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]

# Optimizer
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate)
model, optimizer, train_dataloader, eval_dataloader, test_dataloader = accelerator.prepare(
    model, optimizer, train_dataloader, eval_dataloader, test_dataloader
)

# Scheduler and math around the number of training steps.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
if args.max_train_steps is None:
    args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
else:
    args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)

lr_scheduler = get_scheduler(
    name=args.lr_scheduler_type,
    optimizer=optimizer,
    num_warmup_steps=args.num_warmup_steps,
    num_training_steps=args.max_train_steps,
)

In [7]:
# = = = = = = = = = = = = = = = = Train = = = = = = = = = = = = = = = = = = =
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

logger.info("***** Running training *****")
logger.info(f"  Num examples = {len(train_dataset)}")
logger.info(f"  Num Epochs = {args.num_train_epochs}")
logger.info(f"  Instantaneous batch size per device = {args.per_device_train_batch_size}")
logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
logger.info(f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
logger.info(f"  Total optimization steps = {args.max_train_steps}")

# Only show the progress bar once on each machine.
progress_bar = tqdm(range(args.max_train_steps), desc="Training: ", disable=not accelerator.is_local_main_process)
completed_steps = 0

val_results = []
acc_losses  = []
best_r2_f1  = None
best_epoch  = 0

if args.model_type == 'bart' or args.model_type == 't5':
    task_specific_params = model.config.task_specific_params
    params = task_specific_params.get('summarization', {})
    params['min_length'] = args.min_target_length
    params['max_length'] = args.max_target_length
    params['length_penalty'] = args.length_penalty
    model.config.update(params)
else:
    raise ValueError('{} model type not implemented'.format(args.model_type))

06/17/2022 14:52:16 - INFO - __main__ - ***** Running training *****
06/17/2022 14:52:16 - INFO - __main__ -   Num examples = 11071
06/17/2022 14:52:16 - INFO - __main__ -   Num Epochs = 4
06/17/2022 14:52:16 - INFO - __main__ -   Instantaneous batch size per device = 1
06/17/2022 14:52:16 - INFO - __main__ -   Total train batch size (w. parallel, distributed & accumulation) = 16
06/17/2022 14:52:16 - INFO - __main__ -   Gradient Accumulation steps = 16
06/17/2022 14:52:16 - INFO - __main__ -   Total optimization steps = 2768
Training:   0%|                                        | 0/2768 [00:00<?, ?it/s]

In [8]:
# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  = EVAL =  =  =  =  =  =  =  =  =  =  =  =  =  =  = 
def evaluate_model(index):

    loop_count = 0
    
    model.eval()
    for step, batch in enumerate(eval_dataloader):

        loop_count += 1

        if loop_count != index:
            continue

        with torch.no_grad():
            inputs = batch["input_ids"]
            mask = batch["attention_mask"]
            input_text = tokenizer.batch_decode(inputs, skip_special_tokens=True)[0] + "\n"

            count = 0
            while True:         
                # batch["input_ids"] is the input string turned into tokens
                # generated_tokens is the predicted sentence
                generated_tokens = accelerator.unwrap_model(model).generate(
                    inputs,
                    attention_mask=mask
                )

                generated_tokens = accelerator.pad_across_processes(
                    generated_tokens, dim=1, pad_index=tokenizer.pad_token_id
                )

                generated_tokens = accelerator.gather(generated_tokens).cpu().numpy()
                dialogue_output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

                if dialogue_output == 'end_of_dialogue' or count > 15:
                    break

                input_text = input_text + dialogue_output + "\n"
                tokenized = tokenizer([input_text], max_length=args.max_source_length, padding='max_length', truncation=True)
                inputs, mask = tokenized["input_ids"], tokenized["attention_mask"]
                inputs, mask = torch.tensor(inputs), torch.tensor(mask)
                inputs = inputs.to(device="cuda:0")
                mask = mask.to(device="cuda:0")

                count += 1
            
            print("========================== Generated Dialogue from Summary ====================== ")
            print(input_text)

            labels = batch["labels"]
            if not args.pad_to_max_length:
                # If we did not pad to max length, we need to pad the labels too
                labels = accelerator.pad_across_processes(batch["labels"], dim=1, pad_index=tokenizer.pad_token_id)

            labels = accelerator.gather(labels).cpu().numpy()

            if args.ignore_pad_token_for_loss:
                # Replace -100 in the labels as we can't decode them.
                labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
            if isinstance(generated_tokens, tuple):
                generated_tokens = generated_tokens[0]

            decoded_preds = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
            decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
            
            decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
            
            print("========================== Original Dialogue ================================== ")
            print(decoded_labels[0])

            break

In [16]:
evaluate_model(56)

generate: Summary - Marta is grocery shopping for dinner. She and Nick will make lasagne. Dialogue - Marta: Hi, I'm at the supermarket now to make some shopping for todays dinner. Do you have any wishes?
Nick: Hi Marta, yes, I have a few. I'll make some lasagne.
Marta: Great!

Marta: Hi, I'm at the supermarket now to make some shopping for todays dinner.
Do you have any wishes?
Nick: Hm I don't know.
I haven't eat spaghetti in a while Marta: Oh no, I've got spaghetti yesterday by Patric and the day before too.
Nick: Okay maybe some fish?
Marta: Yeah fish is great, I'll go and search for something Nick: Text me what do you find.
Marta: Actually there is one small fish left and I don't think we will be full from it.
Nick:


In [None]:
model.eval()
val_predict     = []
val_groundtruth = []
for step, batch in enumerate(eval_dataloader):
    with torch.no_grad():
        generated_tokens = accelerator.unwrap_model(model).generate(
            batch["input_ids"],
            attention_mask=batch["attention_mask"]
        )

        generated_tokens = accelerator.pad_across_processes(
            generated_tokens, dim=1, pad_index=tokenizer.pad_token_id
        )
        labels = batch["labels"]
        if not args.pad_to_max_length:
            # If we did not pad to max length, we need to pad the labels too
            labels = accelerator.pad_across_processes(batch["labels"], dim=1, pad_index=tokenizer.pad_token_id)

        generated_tokens = accelerator.gather(generated_tokens).cpu().numpy()
        labels = accelerator.gather(labels).cpu().numpy()

        if args.ignore_pad_token_for_loss:
            # Replace -100 in the labels as we can't decode them.
            labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
        if isinstance(generated_tokens, tuple):
            generated_tokens = generated_tokens[0]
        decoded_preds = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

        decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

        val_predict.extend(decoded_preds)
        val_groundtruth.extend(decoded_labels)

logger.info("")
logger.info("Rouge score on val set after epoch {}".format(epoch+1))
eval_results = py_rouge_scores(val_predict, val_groundtruth)
val_results.append(val_results)

In [8]:
val_predict = ['Hi Tom are you busy tomorrow?',
 'No Im not, why?',
 'Can you go to the animal saving thing?.',
 'Why?',
 'I wanna get a dog for my son.',
 'That will make him very happy.',
 'Yeah, we’ve discussed it many times.\nI think hes ready.',
 'That’s good.\nRaising a dog is tough.\nLike a baby ;-)',
 "I'll get him one of those little dogs.",
 "One that won't grow up too big;-)",
 'And poop all over the place',
 'I like to eat chickens everyday?',
 'Oh, yes, I took him there last Monday.\nHe showed me one that he really liked.',
 'cuckooo i am a chicken',
 'the chicken i want to eat is right at home, away in the fridge',
 "I wonder what he'll name it."]

val_predict

['Hi Tom are you busy tomorrow?',
 'No Im not, why?',
 'Can you go to the animal saving thing?.',
 'Why?',
 'I wanna get a dog for my son.',
 'That will make him very happy.',
 'Yeah, we’ve discussed it many times.\nI think hes ready.',
 'That’s good.\nRaising a dog is tough.\nLike a baby ;-)',
 "I'll get him one of those little dogs.",
 "One that won't grow up too big;-)",
 'And poop all over the place',
 'I like to eat chickens everyday?',
 'Oh, yes, I took him there last Monday.\nHe showed me one that he really liked.',
 'cuckooo i am a chicken',
 'the chicken i want to eat is right at home, away in the fridge',
 "I wonder what he'll name it."]

In [9]:
val_groundtruth = ['Hi Tom, are you busy tomorrow’s afternoon?',
 'I’m pretty sure I am.\nWhat’s up?',
 'Can you go with me to the animal shelter?.',
 'What do you want to do?',
 'I want to get a puppy for my son.',
 'That will make him so happy.',
 'Yeah, we’ve discussed it many times.\nI think he’s ready now.',
 'That’s good.\nRaising a dog is a tough issue.\nLike having a baby ;-)',
 "I'll get him one of those little dogs.",
 "One that won't grow up too big;-)",
 'And eat too much;-))',
 'Do you know which one he would like?',
 'Oh, yes, I took him there last Monday.\nHe showed me one that he really liked.',
 'I bet you had to drag him away.',
 'He wanted to take it home right away ;-).',
 "I wonder what he'll name it."]

val_groundtruth

['Hi Tom, are you busy tomorrow’s afternoon?',
 'I’m pretty sure I am.\nWhat’s up?',
 'Can you go with me to the animal shelter?.',
 'What do you want to do?',
 'I want to get a puppy for my son.',
 'That will make him so happy.',
 'Yeah, we’ve discussed it many times.\nI think he’s ready now.',
 'That’s good.\nRaising a dog is a tough issue.\nLike having a baby ;-)',
 "I'll get him one of those little dogs.",
 "One that won't grow up too big;-)",
 'And eat too much;-))',
 'Do you know which one he would like?',
 'Oh, yes, I took him there last Monday.\nHe showed me one that he really liked.',
 'I bet you had to drag him away.',
 'He wanted to take it home right away ;-).',
 "I wonder what he'll name it."]

In [11]:
bleu = bleu_scores(val_groundtruth, val_predict)

06/17/2022 14:52:48 - INFO - root - 
06/17/2022 14:52:48 - INFO - root - 	bleu-1: 0.532	bleu-2: 0.441	bleu-3: 0.391	bleu-4: 0.329
06/17/2022 14:52:48 - INFO - root - 


In [13]:
meteor, scores= meteor_scores(val_groundtruth, val_predict)

06/17/2022 14:52:59 - INFO - root - 
06/17/2022 14:52:59 - INFO - root - 	meteor: 0.540
06/17/2022 14:52:59 - INFO - root - 


In [14]:
scores

[0.5434782608695652,
 0.0,
 0.66167290886392,
 0.0,
 0.6320224719101123,
 0.8066666666666668,
 0.7332282110091743,
 0.6843065693430658,
 0.9990234375,
 0.9985422740524781,
 0.11904761904761905,
 0.06410256410256411,
 0.9998779296875,
 0.06493506493506494,
 0.33223684210526316,
 0.9976851851851852]

In [14]:
from nltk.translate.meteor_score import meteor_score

In [22]:
reference = list([val_groundtruth[0].split()])
# candidate = val_predict[0].split()
candidate = list("Hi Tom, are you busy tomorrow’s?".split())
score = meteor_score(reference, candidate)
print(score)

0.7217391304347825


In [18]:
reference

[['Hi', 'Tom,', 'are', 'you', 'busy', 'tomorrow’s', 'afternoon?']]

In [16]:
candidate

['Hi', 'Tom,', 'are', 'you', 'busy', "tomorrow's", 'afternoon?']

In [32]:
reference = "It is a guide to action which ensures that the military always obeys the commands of the party"
candidate = "It is a guide to action that ensures that the military will forever heed Party commands"

reference = [reference.split()]
candidate = candidate.split()

In [33]:
meteor_score(reference, candidate)

0.6320224719101123

In [60]:
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to /home/ubuntu/nltk_data...
[nltk_data]   Unzipping corpora/omw-1.4.zip.


True

In [37]:
from nltk.translate.bleu_score import sentence_bleu

from nltk.translate.bleu_score import SmoothingFunction
smoothie = SmoothingFunction().method4

In [36]:
reference = [['this', 'is', 'small', 'test']]
candidate = ['this', 'is', 'a', 'test']
print(sentence_bleu(reference, candidate, weights=weights))

1.0547686614863434e-154


In [35]:
weights = (1./4., 1./4., 1./4., 1./4.)

In [17]:
m = "hi"
p = 1
r = 2
f = 3.333

In [18]:
print('\t{}:\t{}: {:5.2f}\t{}: {:5.2f}\t{}: {:1.2f}'.format(m, 'P', 100.0 * p, 'R', 100.0 * r, 'F1', 100.0 * f))

	hi:	P: 100.00	R: 200.00	F1: 333.30


In [7]:
print("{:5.2f}".format(43.444))

43.44
