In [1]:
import glob, os

import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

from pathlib import Path
from tqdm import tqdm

import easydict

from finetune import SummarizationTrainer

import warnings

warnings.filterwarnings(action='ignore')

In [2]:
args = easydict.EasyDict({
    "adam_epsilon":1e-08,
    "cache_dir":'',
    "config_name":'',
    "data_dir":'./data/cnn_dm/cnn_dm',
    "do_lower_case":False,
    "do_predict":False,
    "do_train":True,
    "eval_batch_size":4,
    "fp16":False,
    "fp16_opt_level":'O1',
    "gradient_accumulation_steps":1,
    "learning_rate":3e-05,
    "max_grad_norm":1.0,
    "max_source_length":1024,
    "max_target_length":56,
    "model_name_or_path":'t5-small',
    "model_type":'t5',
    "n_gpu":1,
    "n_tpu_cores":0,
    "num_train_epochs":3,
    "output_dir":'./output/ckpt',
    "seed":42,
    "server_ip":'',
    "server_port":'',
    "tokenizer_name":'',
    "train_batch_size":4,
    "warmup_steps":0,
    "weight_decay":0.0
})

In [3]:
trainer = SummarizationTrainer(args)

INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-small-config.json from cache at C:\Users\Daeyoung\.cache\torch\transformers\26561bc9e840d8945f475d0d4c4b9df32025eadd79894b867b570cb1d09e67a9.3817cc1260a6b941b17af62b4f2a942b9825f209d8e2eed99e79e96f85f59aab
INFO:transformers.configuration_utils:Model config T5Config {
  "_num_labels": 2,
  "architectures": [
    "T5WithLMHeadModel"
  ],
  "bad_words_ids": null,
  "bos_token_id": null,
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "do_sample": false,
  "dropout_rate": 0.1,
  "early_stopping": false,
  "eos_token_id": 1,
  "finetuning_task": null,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_factor": 1.0,
  "is_decoder": false,
  "is_encoder_decoder": true,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_epsilon": 1e-06,
  "length_penalty": 1.0,
  "max_length": 20,
  "min_length": 0,
 

In [4]:
# Finetuned weight
checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpointepoch=*.ckpt"), recursive=True)))
checkpoints

['./output/ckpt\\checkpointepoch=0.ckpt',
 './output/ckpt\\checkpointepoch=1.ckpt',
 './output/ckpt\\checkpointepoch=2.ckpt']

In [5]:
text_input = './data/cnn_dailymail/test.source'
text_label = './data/cnn_dailymail/test.target'

source_lns = [x.rstrip() for x in open(text_input, encoding="UTF8").readlines()]
label_lns = [x.rstrip() for x in open(text_label, encoding="UTF8").readlines()]

In [6]:
source_lns = source_lns[:1000]
label_lns = label_lns[:1000]
len(source_lns)

1000

In [7]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]

In [8]:
# From Not Finetuned Model
device = "cuda"
trainer.model.to("cuda")

batch_size = 16
max_length = 140
min_length = 55

output_file = Path('./output/cnn_dm/ex2/no_ft_generated_summary_1000.txt').open("w", encoding="UTF8")
for batch in tqdm(list(chunks(source_lns, batch_size))):
    batch = ["summarize: " + text for text in batch]
    
    dct = trainer.tokenizer.batch_encode_plus(batch, max_length=512, return_tensors="pt", pad_to_max_length=True)
    input_ids = dct["input_ids"].to(device)
    attention_mask = dct["attention_mask"].to(device)
    
    summaries = trainer.model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        num_beams=4,
        length_penalty=2.0,
        max_length=200,#max_length + 2,  # +2 from original because we start at step=1 and stop before max_length
        min_length=30,#min_length + 1,  # +1 from original because we start at step=1
        no_repeat_ngram_size=3,
        early_stopping=True,
    )
    dec = [trainer.tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries]
    
    for hypothesis in dec:
        output_file.write('.'.join(hypothesis.split('.')[:-1]) + '.' + "\n")
        output_file.flush()

100%|██████████████████████████████████████████████████████████████████████████████████| 63/63 [22:29<00:00, 21.41s/it]


In [10]:
# From finetuned model
for i, ckpt in enumerate(checkpoints):
    trainer = SummarizationTrainer.load_from_checkpoint(checkpoints[i])
    trainer.model.to("cuda")
    
    output_file = Path(f"./output/cnn_dm/ex2/ft_ckpt_{str(i)}_generated_summary_1000.txt").open("w", encoding="UTF8")
    for batch in tqdm(list(chunks(source_lns, batch_size))):
        batch = ["summarize: " + text for text in batch]

        dct = trainer.tokenizer.batch_encode_plus(batch, max_length=512, return_tensors="pt", pad_to_max_length=True)
        input_ids = dct["input_ids"].to(device)
        attention_mask = dct["attention_mask"].to(device)

        summaries = trainer.model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            num_beams=4,
            length_penalty=2.0,
            max_length=200, #max_length + 2,  # +2 from original because we start at step=1 and stop before max_length
            min_length=30, #min_length + 1,  # +1 from original because we start at step=1
            no_repeat_ngram_size=3,
            early_stopping=True,
        )
        dec = [trainer.tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries]

        for hypothesis in dec:
            output_file.write('.'.join(hypothesis.split('.')[:-1]) + '.' + "\n")
            output_file.flush()

INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-small-config.json from cache at C:\Users\Daeyoung\.cache\torch\transformers\26561bc9e840d8945f475d0d4c4b9df32025eadd79894b867b570cb1d09e67a9.3817cc1260a6b941b17af62b4f2a942b9825f209d8e2eed99e79e96f85f59aab
INFO:transformers.configuration_utils:Model config T5Config {
  "_num_labels": 2,
  "architectures": [
    "T5WithLMHeadModel"
  ],
  "bad_words_ids": null,
  "bos_token_id": null,
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "do_sample": false,
  "dropout_rate": 0.1,
  "early_stopping": false,
  "eos_token_id": 1,
  "finetuning_task": null,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_factor": 1.0,
  "is_decoder": false,
  "is_encoder_decoder": true,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_epsilon": 1e-06,
  "length_penalty": 1.0,
  "max_length": 20,
  "min_length": 0,
 

In [15]:
def _prepare_summary(summary):
    # Make sure the summary is not bytes-type
    # Add newlines between sentences so that rougeLsum is computed correctly.
    summary = summary.replace(" . ", " .\n")
    return summary

In [16]:
# calculate rouge score
from rouge_score import rouge_scorer, scoring
from pprint import pprint

scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeLsum"], use_stemmer=True)
aggregator = scoring.BootstrapAggregator()

output_path = './output/cnn_dm/ex2/no_ft_generated_summary_1000.txt'
output_lns = [x.rstrip() for x in open(output_path, encoding="UTF8").readlines()]

for label_ln, output_ln in zip(label_lns, output_lns):
    label_ln = _prepare_summary(label_ln)
    output_ln = _prepare_summary(output_ln)
    scores = scorer.score(label_ln, output_ln)
    aggregator.add_scores(scores)
    
result = aggregator.aggregate()
pprint(result)

{'rouge1': AggregateScore(low=Score(precision=0.409352591920871, recall=0.40067081918885256, fmeasure=0.39144138876204926), mid=Score(precision=0.4192733400835389, recall=0.4099879861134319, fmeasure=0.3999118507037316), high=Score(precision=0.4294267555751109, recall=0.41971588181044006, fmeasure=0.4089158771637931)),
 'rouge2': AggregateScore(low=Score(precision=0.18044589002828093, recall=0.17565312392745236, fmeasure=0.1721202466136278), mid=Score(precision=0.18982395407610692, recall=0.18465497199730305, fmeasure=0.18009563990609367), high=Score(precision=0.19887653841250244, recall=0.19322293397378887, fmeasure=0.1884661918100495)),
 'rougeLsum': AggregateScore(low=Score(precision=0.3755987976136617, recall=0.3678571033556317, fmeasure=0.3596556620059921), mid=Score(precision=0.38494891561002986, recall=0.3769311913500133, fmeasure=0.3676235013079067), high=Score(precision=0.39410544874507497, recall=0.38596860457155174, fmeasure=0.37566750774474994))}


In [18]:
for i in range(3):
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeLsum"], use_stemmer=True)
    aggregator = scoring.BootstrapAggregator()

    output_path = f"./output/cnn_dm/ex2/ft_ckpt_{str(i)}_generated_summary_1000.txt"
    output_lns = [x.rstrip() for x in open(output_path, encoding="UTF8").readlines()]

    for label_ln, output_ln in zip(label_lns, output_lns):
        label_ln = _prepare_summary(label_ln)
        output_ln = _prepare_summary(output_ln)
        scores = scorer.score(label_ln, output_ln)
        aggregator.add_scores(scores)

    result = aggregator.aggregate()
    pprint(result)
    print('\n\n')

{'rouge1': AggregateScore(low=Score(precision=0.3522860923679624, recall=0.5274946325174105, fmeasure=0.40070478118485453), mid=Score(precision=0.3606677337415138, recall=0.5377631045324208, fmeasure=0.4079751474421297), high=Score(precision=0.3692666439845269, recall=0.5471598819257698, fmeasure=0.41479850082609837)),
 'rouge2': AggregateScore(low=Score(precision=0.16270789430409696, recall=0.2435395024937354, fmeasure=0.184616031089806), mid=Score(precision=0.17011610879134662, recall=0.2535684263345558, fmeasure=0.19202399116441077), high=Score(precision=0.1781738513148778, recall=0.26312088928664296, fmeasure=0.19954204356849567)),
 'rougeLsum': AggregateScore(low=Score(precision=0.32581192293804256, recall=0.49090424500891167, fmeasure=0.3717175862456484), mid=Score(precision=0.3344184709666654, recall=0.4999317949109042, fmeasure=0.37862584495396123), high=Score(precision=0.3427991024121199, recall=0.5100885255670852, fmeasure=0.3858172456790044))}



{'rouge1': AggregateScore(lo

In [1]:
import files2rouge

In [None]:

files2rouge.run(hyp_path, ref_path)