### Sync repo to your Google Drive account

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

#!git clone https://github.com/academy-dt/nlp-text-summarisation '/content/drive/MyDrive/MS_DS/NLP/Final project/nlp-text-summarisation'
#os.chdir('/content/drive/MyDrive/MS_DS/NLP/Final project/nlp-text-summarisation')

!git clone https://github.com/academy-dt/nlp-text-summarisation '/content/drive/MyDrive/NLP/nlp-text-summarisation'
os.chdir('/content/drive/MyDrive/NLP/nlp-text-summarisation')

!git submodule init
!git submodule update

In [None]:
%pip install transformers==2.8.0
%pip install torch==1.4.0
%pip install rouge

### Load model

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')
model.to(device)

In [None]:
def summarize(text):
    preprocess_text = text.strip().replace("\n","")
    tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt", max_length=512).to(device)

    summary_ids = model.generate(tokenized_text,
                                 num_beams=4,
                                 no_repeat_ngram_size=2,
                                 min_length=30,
                                 max_length=100,
                                 early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
import json
from generators import get_cnn_dm_both_generator

output = []

i = 0
test_data_path = '/content/drive/MyDrive/test_dataset/test_000.bin'
for article, abstract in get_cnn_dm_both_generator(test_data_path):
    i += 1
    print(f'#{i}')

    t5_abstract = summarize(article)
    output.append({
        'article': article,
        'abstract': abstract,
        't5_abstract': t5_abstract
    })    

with open('t5_output_.json', 'w') as fout:
    json.dump(output, fout, indent=2)

### ROUGE evaluation

In [None]:
from rouge import Rouge

summaries = [x['t5_abstract'] for x in output]
abstracts = [x['abstract'] for x in output]

rouge = Rouge()
scores = rouge.get_scores(summaries, abstracts)
print(scores)