To save in google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


**Packages**

In [2]:
! pip install datasets --quiet
! pip install evaluate --quiet
! pip install rouge_score --quiet
! pip install sacrebleu --quiet
! pip install transformers --quiet
! pip install -q sentencepiece --quiet
! pip install summarizer --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 KB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 KB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.9/132.9 KB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 KB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 KB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.8/158.8 KB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from datasets import load_dataset
import evaluate

from pprint import pprint

**Data**

In [4]:
train_df = pd.read_csv('/content/drive/MyDrive/W266FinalProject/Datasets/xl_sum_sample_train.csv')
val_df = pd.read_csv('/content/drive/MyDrive/W266FinalProject/Datasets/xl_sum_sample_val.csv')
test_df = pd.read_csv('/content/drive/MyDrive/W266FinalProject/Datasets/xl_sum_sample_test.csv')

In [5]:
print(f'train shape: {train_df.shape}')
print(f'val size: {val_df.shape}')
print(f'test size: {test_df.shape}')

train shape: (1000, 2)
val size: (100, 2)
test size: (100, 2)


In [6]:
train_df.head(n=2)

Unnamed: 0,text,summary
0,By Rebecca Ricks & Johnny O'SheaBBC Spotlight ...,"During the spring, at the height of the Covid-..."
1,"By Rachel SchraerBBC Reality Check So, why did...","The parents of five-year-old Tafida Raqeeb, wh..."


**T5 Model**

##### 1.) Load and set up model

In [7]:
from transformers import T5Tokenizer, TFT5ForConditionalGeneration

t5model = TFT5ForConditionalGeneration.from_pretrained("t5-base")
t5tokenizer = T5Tokenizer.from_pretrained("t5-base")

Downloading (…)lve/main/config.json: 0.00B [00:00, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/892M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [8]:
## Set up model params

summary_max_length = 512
target_max_length = 128

##### 3.) Test model

In [9]:
rouge = evaluate.load('rouge')

Downloading builder script: 0.00B [00:00, ?B/s]

In [10]:
chrf = evaluate.load("chrf")

Downloading builder script: 0.00B [00:00, ?B/s]

In [11]:
test_df.shape

(100, 2)

In [12]:
r1 = []
r2 = []
rL = []
rLs = []
chrfs = []

for i in test_df.index:

    T5ARTICLE_TO_SUMMARIZE = test_df['text'][i]

    inputs = t5tokenizer(T5ARTICLE_TO_SUMMARIZE, 
                         max_length=summary_max_length, 
                         truncation=True, 
                         return_tensors="tf")

    summary_ids = t5model.generate(inputs["input_ids"], max_length=target_max_length)

    candidate = t5tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

    ref = [test_df['summary'][i]]

    rouge_results = rouge.compute(predictions=candidate,
                                  references=ref)

    r1.append(rouge_results['rouge1'])
    r2.append(rouge_results['rouge2'])
    rL.append(rouge_results['rougeL'])
    rLs.append(rouge_results['rougeLsum'])
    
    chrf_results = chrf.compute(predictions=candidate,
                                references=ref)
    chrfs.append(chrf_results['score'])

    if i in np.arange(0, 50, 100):
        print(i)

0


In [13]:
print('rouge1 average :', np.mean(r1))
print('rouge2 average :', np.mean(r2))
print('rougeL average :', np.mean(rL))
print('rougeLs average :', np.mean(rLs))
print('chrf average :', np.mean(chrfs))

rouge1 average : 0.19809182836136244
rouge2 average : 0.03253749902763041
rougeL average : 0.13468246215931587
rougeLs average : 0.13468246215931587
chrf average : 26.984854771913675


In [14]:
data = {'rouge1': r1, 'rouge2': r2, 'rogueL': rL, 'rogueLs': rLs, 'chrf': chrfs}

scores = pd.DataFrame(data)

scores.to_csv(r'/content/drive/MyDrive/W266FinalProject/model_results/no_finetuned_T5_scores_samples_maxlength512.csv', index=False)