In [None]:
import sys
!{sys.executable} -m pip install transformers==4.6.1
!{sys.executable} -m pip install datasets
#!{sys.executable} -m pip install --upgrade torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
#!{sys.executable} -m pip install --upgrade onnxruntime==1.7.0
#!{sys.executable} -m pip install -i https://test.pypi.org/simple/ ort-nightly
#!{sys.executable} -m pip install --upgrade onnxruntime-tools
!{sys.executable} -m pip install sentencepiece
!{sys.executable} -m pip install rouge_score
!pip install progress

Collecting transformers==4.6.1
[?25l  Downloading https://files.pythonhosted.org/packages/d5/43/cfe4ee779bbd6a678ac6a97c5a5cdeb03c35f9eaebbb9720b036680f9a2d/transformers-4.6.1-py3-none-any.whl (2.2MB)
[K     |████████████████████████████████| 2.3MB 30.6MB/s 
Collecting huggingface-hub==0.0.8
  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 24.0MB/s 
[?25hCollecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[

In [None]:
import torch
from torch import nn
from torch.quantization import quantize_dynamic
from transformers import (AutoTokenizer, AutoModelForSeq2SeqLM)
from progress.bar import Bar

Bar.check_tty = False

In [None]:
from contextlib import contextmanager
import time
import string
import warnings
warnings.filterwarnings('ignore')

@contextmanager
def timer(msg):
    t0 = time.time()
    print(f'[{msg}] start.')
    yield
    elapsed_time = time.time() - t0
    print(f'[{msg}] done in {elapsed_time} sec.')

In [None]:
import os 
from google.colab import drive
drive.mount("/content/drive")
os.chdir("/content/drive/MyDrive")

Mounted at /content/drive


# Loading the Model

In [None]:
model_ckpt = '/content/drive/MyDrive/GP/pipeline/xsum 16-12/best_tfmr'
max_input_length = 512
tokenizer = AutoTokenizer.from_pretrained('google/pegasus-xsum')
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt, max_length=max_input_length,
                                                            max_position_embeddings=max_input_length).to('cuda')


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1362.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1912529.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=3520083.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=65.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=87.0, style=ProgressStyle(description_w…




In [None]:
t_input = "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."

token = tokenizer(t_input, truncation=True, padding='max_length', max_length=max_input_length, return_tensors="pt")

input_ids = token["input_ids"].to('cuda')
attention_mask = token["attention_mask"].to('cuda')
# 'set num_beams = 1' for greedy search
with timer('inference time ...'):
  tokens = model.generate(input_ids=input_ids, attention_mask=attention_mask, num_beams=4)

output = tokenizer.decode(tokens.squeeze(), skip_special_tokens=True)

print(output)


[inference time ...] start.
[inference time ...] done in 0.5508520603179932 sec.
The Eiffel Tower is the world's tallest free-standing structure.


# Measuring Performance

In [None]:
!pip install nltk
import re

from filelock import FileLock


try:
    import nltk

    NLTK_AVAILABLE = True
except (ImportError, ModuleNotFoundError):
    NLTK_AVAILABLE = False

if NLTK_AVAILABLE:
    with FileLock(".lock") as lock:
        nltk.download("punkt", quiet=True)


def add_newline_to_end_of_each_sentence(x: str) -> str:
    """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
    re.sub("<n>", "", x)  # remove pegasus newline char
    assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)"
    return "\n".join(nltk.sent_tokenize(x))



## Rouge Function

In [None]:
## ROUGE Utils
from rouge_score import rouge_scorer, scoring
import datasets

import torch
from torch.utils.data import DataLoader, random_split
from torch.nn import functional as F

from typing import Callable, Dict, Iterable, List, Tuple, Union

import numpy as np


ROUGE_KEYS = ["rouge1", "rouge2", "rougeL", "rougeLsum"]


def extract_rouge_mid_statistics(dct):
    new_dict = {}
    for k1, v1 in dct.items():
        mid = v1.mid
        new_dict[k1] = {stat: round(getattr(mid, stat), 4) for stat in ["precision", "recall", "fmeasure"]}
    return new_dict


def calculate_rouge(
    pred_lns: List[str],
    tgt_lns: List[str],
    use_stemmer=True,
    rouge_keys=ROUGE_KEYS,
    return_precision_and_recall=False,
    bootstrap_aggregation=True,
    newline_sep=True,
) -> Dict:
    """Calculate rouge using rouge_scorer package.

    Args:
        pred_lns: list of summaries generated by model
        tgt_lns: list of groundtruth summaries (e.g. contents of val.target)
        use_stemmer:  Bool indicating whether Porter stemmer should be used to
        strip word suffixes to improve matching.
        rouge_keys:  which metrics to compute, defaults to rouge1, rouge2, rougeL, rougeLsum
        return_precision_and_recall: (False) whether to also return precision and recall.
        bootstrap_aggregation: whether to do the typical bootstrap resampling of scores. Defaults to True, if False
            this function returns a collections.defaultdict[metric: list of values for each observation for each subscore]``
        newline_sep:(default=True) whether to add newline between sentences. This is essential for calculation rougeL
        on multi sentence summaries (CNN/DM dataset).

    Returns:
         Dict[score: value] if aggregate else defaultdict(list) keyed by rouge_keys

    """
    scorer = rouge_scorer.RougeScorer(rouge_keys, use_stemmer=use_stemmer)
    aggregator = scoring.BootstrapAggregator()
    for pred, tgt in zip(tgt_lns, pred_lns):
        # rougeLsum expects "\n" separated sentences within a summary
        if newline_sep:
            pred = add_newline_to_end_of_each_sentence(pred)
            tgt = add_newline_to_end_of_each_sentence(tgt)
        scores = scorer.score(pred, tgt)
        aggregator.add_scores(scores)

    if bootstrap_aggregation:
        result = aggregator.aggregate()
        if return_precision_and_recall:
            return extract_rouge_mid_statistics(result)  # here we return dict
        else:
            return {k: round(v.mid.fmeasure * 100, 4) for k, v in result.items()}

    else:
        return aggregator._scores  # here we return defaultdict(list)

## The Dataset PreProcessing

In [None]:
class PegasusDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels['input_ids'][idx])  # torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels["input_ids"])


def prepare_data(model_name, 
                 train_texts=None, train_labels=None, 
                 val_texts=None, val_labels=None, 
                 test_texts=None, test_labels=None):
  """
  Prepare input data for model fine-tuning
  """
  tokenizer = AutoTokenizer.from_pretrained(model_name)

  prepare_train = False if train_texts is None or train_labels is None else True
  prepare_val = False if val_texts is None or val_labels is None else True
  prepare_test = False if test_texts is None or test_labels is None else True

  def tokenize_data(texts, labels):
    encodings = tokenizer(texts, truncation=True, padding='longest')
    decodings = tokenizer(labels, truncation=True, padding='longest')
    dataset_tokenized = PegasusDataset(encodings, decodings)
    return dataset_tokenized

  train_dataset = tokenize_data(train_texts, train_labels) if prepare_train else None
  val_dataset = tokenize_data(val_texts, val_labels) if prepare_val else None
  test_dataset = tokenize_data(test_texts, test_labels) if prepare_test else None

  return train_dataset, val_dataset, test_dataset

In [None]:
dataset = datasets.load_dataset('xsum')

#source data
#train_texts, train_labels = dataset['train']['document'][:100000], dataset['train']['summary'][:100000]
#valid_texts, valid_labels = dataset['validation']['document'][:10000], dataset['validation']['summary'][:10000]
test_texts, test_labels = dataset['test']['document'], dataset['test']['summary']
train_dataset, valid_dataset, test_dataset = prepare_data('google/pegasus-xsum',test_texts =test_texts,test_labels=test_labels)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1930.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=954.0, style=ProgressStyle(description_…




Using custom data configuration default


Downloading and preparing dataset xsum/default (download: 245.38 MiB, generated: 507.60 MiB, post-processed: Unknown size, total: 752.98 MiB) to /root/.cache/huggingface/datasets/xsum/default/1.2.0/4957825a982999fbf80bca0b342793b01b2611e021ef589fb7c6250b3577b499...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=254582292.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1001503.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset xsum downloaded and prepared to /root/.cache/huggingface/datasets/xsum/default/1.2.0/4957825a982999fbf80bca0b342793b01b2611e021ef589fb7c6250b3577b499. Subsequent calls will reuse this data.


In [None]:
#del dataset
#gc.collect()
import gc
test_dataloader = DataLoader(test_dataset,batch_size=2)

## Evaluation Loop

In [None]:
    # Evaluation LOOP
with torch.no_grad():
      model.eval()
      all_labels = []
      all_preds = []
      test_loss=[]
      bar = Bar("Calculating Rouge Scores ...", max=len(test_dataloader))

      for test_batch in test_dataloader:
          y = test_batch['labels'].to('cuda')
          x = {
                  'input_ids':test_batch['input_ids'].to('cuda'),
                  'attention_mask':test_batch['attention_mask'].to('cuda'),
              } 
            
          prediction = model.generate(**x)

          bar.next()
          
          del x
          gc.collect()

          all_labels.append(y)
          all_preds.append(prediction)

          del y 
          del prediction
          gc.collect()
          
          #loss , all_losses = blended_loss(teacher,student,x,y,e_layers_list, d_layers_list,mean_ce, mean_logits, mean_hidden,pad_token_id)       
          #test_loss.append(loss.item())
          #wandb.log(all_losses)
      
      bar.finish()    
      #test_losses = torch.mean(torch.tensor(test_loss))
      #print('test_loss: ', test_losses.item())
      preds = [tokenizer.decode(pred[0].squeeze(), skip_special_tokens=True) for pred in all_preds]
      lbls = [tokenizer.decode(lbl[0].squeeze(), skip_special_tokens=True) for lbl in all_labels]
      rouge_score = calculate_rouge(pred_lns=preds,tgt_lns=lbls)
      #wandb.log(rouge_score)
      print(rouge_score)

[KCalculating Rouge Scores ... |################################| 5667/5667
[?25h

{'rouge1': 46.3585, 'rouge2': 23.8018, 'rougeL': 38.7079, 'rougeLsum': 38.7154}


In [None]:
def get_response(input_text):
  batch = tokenizer([input_text],truncation=True,padding='longest',return_tensors="pt").to('cuda')
  translated = model.generate(**batch)
  tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

In [None]:
context1 = "The problem is affecting people using the older versions of the PlayStation 3, called the 'Fat' model.The problem isn't affecting the newer PS3 Slim systems that have been on sale since September last year.Sony have also said they are aiming to have the problem fixed shortly but is advising some users to avoid using their console for the time being.'We hope to resolve this problem within the next 24 hours,' a statement reads. 'In the meantime, if you have a model other than the new slim PS3, we advise that you do not use your PS3 system, as doing so may result in errors in some functionality, such as recording obtained trophies, and not being able to restore certain data.'We believe we have identified that this problem is being caused by a bug in the clock functionality incorporated in the system.'The PlayStation Network is used by millions of people around the world.It allows users to play their friends at games like Fifa over the internet and also do things like download software or visit online stores."
print(get_response(context1))
context2 = "She will play Denker, a lady’s maid to Dame Maggie Smith’s character, the Dowager Countess of Grantham. Johnston, who has also appeared in Waking the Dead and Coronation Street, joins new stars Richard E Grant and Anna Chancellor, both of whom will play guests of the Granthams at Downton. The hit period drama will return to screens this autumn. Series four of the show, which followed the wealthy Grantham family and their servants, achieved an average of 11.9 million viewers in the UK. The very British drama has also been a huge hit in the US, winning both Emmy Awards and Golden Globes. More than 26 million viewers watched series four on Masterpiece on PBS, making it one of the highest rating shows on American television. Previous high profile guest stars include Shirley Maclaine who played Martha Levinson, Lady Grantham’s mother, and Oscar-nominated actor Paul Giamatti who appeared in last year’s Christmas special as her ”maverick, playboy” son. Series five will also feature 24 star Rade Sherbedgia as a Russian refugee who has fled the revolution after World War 1. Earlier this year, executive producer Gareth Neame promised it would have ”all the usual highs and lows, romance, drama and comedy”."
print(get_response(context2))
context3 = "Media playback is not supported on this device Craig Cathcart put the visitors ahead before substitute Simon Church won and scored an 89th-minute penalty. ”There were lots of positives out of it even if we’d have come off and lost 1-0. They had a good mentality and attitude,” said Coleman. Wales face another Euro 2016 warm-up game against Ukraine in Kiev on Monday. ”We look forward to our next challenge now,” added Coleman. ”The team will change up again, and we’ll see how they go again.” Striker Church, currently on loan at Scottish Premiership side Aberdeen from Reading, was delighted with his equaliser from the spot. ”Northern Ireland were a tough side to play against. They’ve obviously done well to get where they are and it was a tough game,” he said. ”We wanted to do well because it was the last time a Wales crowd would see us before the Euros and we wanted to put in a good performance. ”I’ve just got to keep going now and hopefully score some goals. This is a great squad to be part of.”"
print(get_response(context3))

['Sony have said they are investigating a problem with the PlayStation Network.']
['Downton Abbey has added a new star to its line-up for the fifth series, according to the BBC.']
['Wales manager Chris Coleman praised his side after they came from behind to draw with Northern Ireland.']
