# Model Evaluation

## Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -q evaluate transformers datasets rouge_score jiwer
!pip install -q git+https://github.com/google-research/bleurt.git

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m783.3 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for BLEURT (setup.py) ... [?25l[?25hdone


In [None]:
import os
from datasets import load_dataset
from evaluate import evaluator, load
import evaluate
from transformers import AutoModelForSeq2SeqLM, pipeline, AutoTokenizer
import pprint
import torch
import numpy as np

In [None]:
with open("/content/drive/MyDrive/colab-notebooks/w266/hf.txt", "r") as f:
    HF_TOKEN = f.read()

os.environ["HF_TOKEN"] = HF_TOKEN

DATASET = "jordanfan/processed_us_congress_117_bills_v3"
# DATASET = "jordanfan/billsum_abstracted_us_congress_117_bills_all"
INPUT_COLUMN = "cur_text"
LABEL_COLUMN = "cleaned_summary"

MODEL = "etav22/pegasus-lora-legalease"
TOKENIZER = "etav22/pegasus-lora-legalease"
REVISION = "main"

print(f"Training: {MODEL} | {REVISION} ")
print(f"Dataset: {DATASET} | {INPUT_COLUMN} | {LABEL_COLUMN}")

Training: etav22/pegasus-lora-legalease | main 
Dataset: jordanfan/processed_us_congress_117_bills_v3 | cur_text | cleaned_summary


## Data + Model

In [None]:
dataset = load_dataset(DATASET, split="test")

# Select the hand-picked examples
indeces = [9796, 14613, 4524, 2300]
examples = dataset.filter(lambda example: example["index"] in indeces)
examples

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/1.67k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/133M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/132M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/78.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/11.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/11277 [00:00<?, ? examples/s]

Generating val split:   0%|          | 0/3388 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/377 [00:00<?, ? examples/s]

Filter:   0%|          | 0/377 [00:00<?, ? examples/s]

Dataset({
    features: ['Unnamed: 0', 'index', 'id', 'policy_areas', 'cur_summary', 'cur_text', 'title', 'titles_official', 'titles_short', 'sponsor_name', 'sponsor_party', 'sponsor_state', 'cleaned_summary', 'extracted_text', 'extracted_text_375', 'extracted_text_750', 'extracted_text_1000', 'bertsum_extracted_250', 'bertsum_extracted_375', 'bertsum_extracted_375_1000', 'bertsum_extracted_250_1000', 'bertsum_extracted_375_750', 'bertsum_extracted_250_750', 'bertsum_extracted_375_500', 'bertsum_extracted_250_500', 'bertsum_extracted_375_375', 'bertsum_extracted_250_375'],
    num_rows: 4
})

In [None]:
# Write the example cur_text to txt files
for i, example in enumerate(examples):
    with open(f"{example['index']}.txt", "w") as f:
        f.write(example["cur_text"])

In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL, revision=REVISION)
tokenizer = AutoTokenizer.from_pretrained(MODEL, revision=REVISION, model_max_length=512)

config.json:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/224 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.1k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.11k [00:00<?, ?B/s]

## Generate the summaries

In [None]:
def encode_decode(example):
    inputs = tokenizer(example[INPUT_COLUMN], return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length = 128, num_beams = 4)
    result = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return {'generated_summary': result}

summarized = examples.map(encode_decode)

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

In [None]:
summarized['generated_summary']

['Consolidated Appropriations Act, 2022 This bill makes consolidated appropriations for the fiscal year ending September 30, 2022, and for providing emergency assistance for the situation in Ukraine, and for other purposes.',
 "The National Collegiate Athletic Association Accountability Act of 2022 establishes and administers due process requirements for the investigation of any member institution, student athlete enrolled in such member institution, or other individual for any alleged infraction of the covered athletic association's bylaws or failure to meet the conditions and obligations of membership if the matter cannot be resolved without a formal investigation.",
 'This bill reauthorizes the Middle Class Tax Relief and Job Creation Act of 2012 to reauthorize the First Responder Network Authority.',
 'Rehabilitation Innovation Centers Act of 2022.']