In [None]:
# Installation Library

!pip install transformers datasets sentencepiece rouge 

In [None]:
import torch
import pandas as pd
from rouge import Rouge
from transformers import pipeline
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import BartForConditionalGeneration, BartTokenizer

In [None]:
dataset = load_dataset("billsum")

Using custom data configuration default
Reusing dataset billsum (/root/.cache/huggingface/datasets/billsum/default/3.0.0/d1e95173aed3acb71327864be74ead49b578522e4c7206048b2f2e5351b57959)


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
test_cases =  dataset['ca_test']

In [None]:
case1 = test_cases[1]['text']
sum1 =  test_cases[1]['summary']

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "sshleifer/distilbart-xsum-12-6"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

In [None]:
batch = tokenizer(case1, truncation=True, padding="longest", return_tensors="pt").to(device)

In [None]:
batch

{'input_ids': tensor([[   0,  133,   82,  ..., 8257,    9,    2]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1]], device='cuda:0')}

In [None]:
summary1 = model.generate(**batch, max_length=2000, min_length=500)
systemsummary = tokenizer.batch_decode(summary1, skip_special_tokens=True)

In [None]:
standard_summary = sum1
system_summary = systemsummary[0]

In [None]:
standard_summary

'Existing law provides that the Board of Parole Hearings or its successor in interest shall be the state’s parole authority. Existing law requires that a prisoner who is found to be permanently medically incapacitated, as specified, be granted medical parole, if the Board of Parole Hearings determines that the conditions under which the prisoner would be released would not reasonably pose a threat to public safety. Existing law exempts a prisoner sentenced to death, a prisoner sentenced to life without the possibility of parole, and a prisoner who is serving a sentence for which parole is prohibited by initiative statute, from medical parole eligibility.\nExisting law authorizes a court to resentence or recall the sentence of a prisoner if the court finds that the prisoner is terminally ill, as specified, or the prisoner is permanently medically incapacitated, as specified, and, in either case, the conditions under which the prisoner would be released or receive treatment do not pose a

In [None]:
system_summary

' The US state of California does not enact any new laws, nor does it amend or amend the Penal Code, or any other law, which would apply to any prisoner serving a sentence without possibility of parole, or may be subject to any other such legislation, or that of any such law, or law, that would affect the release of a prisoner who has been convicted of a particular particular offence, or is not eligible for resentence or recall, under certain circumstances, or for a similar offence, under the same number of the same type of criminal offence (or that would be eligible for the same offence (that would be deemed to the same or of any other offence (and that would not be held by the other).. the current law or of a similar person, or of the other, or by the same persons, or to that of a specific person (or of a different person, for the appropriate person, in the same person ( or that or that that that (or other) or that (that or that) or other (that that ( or other) in the other (or perso

In [None]:
rouge = Rouge()
score = rouge.get_scores(system_summary, standard_summary)
pd.DataFrame(score[0]).set_index([['recall','precision','f-measure']])

Unnamed: 0,rouge-1,rouge-2,rouge-l
recall,0.306122,0.083832,0.255102
precision,0.263158,0.049645,0.219298
f-measure,0.283019,0.062361,0.235849


Test DataSet

In [None]:
CasesText = test_cases['text']
GoldSummary = test_cases['summary']

In [None]:
len(CasesText), len(GoldSummary)

(1237, 1237)

In [None]:
# CasesText[:500]
# CasesText[500:800]
# CasesText[800:]

In [None]:
SystemSummary = []

for i, case in enumerate(CasesText[800:]):
    
    batch = tokenizer(case, truncation=True, padding="longest", return_tensors="pt").to(device)  
    summary = model.generate(**batch, max_length=3000, min_length=500)
    Summary = tokenizer.batch_decode(summary, skip_special_tokens=True)

    SystemSummary.append(Summary)
    print(i)

In [None]:
len(SystemSummary), len(GoldSummary[800:])

(437, 437)

In [None]:
SystemSummaryFinal = []

for i in SystemSummary:
  SystemSummaryFinal.append((i[0]))

In [None]:
Summaries = pd.DataFrame(list(zip(GoldSummary[800:], SystemSummaryFinal)), columns =['GoldSummary', 'SystemSummary'])

In [None]:
path = "/content/drive/MyDrive/LegSuM/Data/Data by Models/"

Summaries.to_csv(path + "BartSum3.csv", header=True, index=False)

**BART Model Score on Test**

In [None]:
file1 = pd.read_csv(path + "BartSum1.csv")
file2 = pd.read_csv(path + "BartSum2.csv")
file3 = pd.read_csv(path + "BartSum3.csv")

In [None]:
file1.shape, file2.shape, file3.shape, 

((500, 2), (300, 2), (437, 2))

In [None]:
BartSum = pd.concat([file1, file2, file3])

In [None]:
BartSum.sample(5)

Unnamed: 0,GoldSummary,SystemSummary
191,"Existing law, the California Fair Employment a...",Here is the full text of the California Bill o...
301,"Under existing law, when a person employed in ...",The State of California is a state of the Uni...
488,Existing law provides for the regulation of lo...,A bill to explore the feasibility of developi...
8,Existing law provides that a judgment that a c...,Here is the full text of the California Penal ...
141,"Under existing law, the Lanterman Developmenta...",The California Department of Welfare and Inst...


In [None]:
system_summary = BartSum['SystemSummary']
standard_summary = BartSum['GoldSummary']

In [None]:
rouge = Rouge()
score = rouge.get_scores(system_summary, standard_summary, avg=True)

In [None]:
BartRouge = pd.DataFrame(score).set_index([['recall','precision','f-measure']])

In [None]:
BartRouge

Unnamed: 0,rouge-1,rouge-2,rouge-l
recall,0.191691,0.058885,0.162104
precision,0.213633,0.04991,0.180725
f-measure,0.193642,0.050964,0.163746


In [None]:
path = "/content/drive/MyDrive/LegSuM/scores/"

BartRouge.to_csv(path + "BartRouge.csv", header=True)