In [None]:
!pip install rouge sumy

In [None]:
import sys
import nltk
import pandas as pd
from rouge import Rouge
from sumy.nlp.stemmers import Stemmer
from sumy.nlp.tokenizers import Tokenizer
from sumy.parsers.plaintext import PlaintextParser
from sumy.summarizers.kl import KLSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.sum_basic import SumBasicSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer

In [None]:
path = "/content/drive/MyDrive/LegSuM/Data/catest_processed.csv"
data = pd.read_csv(path)

In [None]:
data.sample(5)

Unnamed: 0,bill_id,clean_text,summary,sum_len,text_len,cleantext_len
440,AB 163,The people of the State of California do enact...,Existing law authorizes the Commission on Teac...,1666,5803,5710
1037,AB 2172,The people of the State of California do enact...,Existing law authorizes a person to manufactur...,810,5798,5711
116,SB 411,The people of the State of California do enact...,"Under existing law, every person who deters or...",1310,5217,5121
1200,AB 2766,The people of the State of California do enact...,Existing law establishes the Student Aid Commi...,930,9983,9788
292,SB 1072,The people of the State of California do enact...,Existing law requires the county superintenden...,3530,12721,12543


In [None]:
nltk.download("punkt")
rouge = Rouge()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
print(sys.getrecursionlimit())
sys.setrecursionlimit(5000)
print(sys.getrecursionlimit())

1000
5000


In [None]:
def summarize(text, sumarizer, SENTENCES_COUNT):
    sentences_ = []
    doc = text
    doc_ = PlaintextParser(doc, Tokenizer("en")).document
    for sentence in sumarizer(doc_, SENTENCES_COUNT):
        sentences_.append(str(sentence))

    summm_ = " ".join(sentences_)
    return summm_

In [None]:
data["LexRankSummary"] = data["clean_text"].map(
    lambda x: summarize(x, LexRankSummarizer(), 5)
)
data["KLSummary"] = data["clean_text"].map(
    lambda x: summarize(x, KLSummarizer(), 5)
)
data["TextRankSummary"] = data["clean_text"].map(
    lambda x: summarize(x, TextRankSummarizer(), 5)
)
data["SumBasicSummary"] = data["clean_text"].map(
    lambda x: summarize(x, SumBasicSummarizer(), 5)
)
data["LsaSummary"] = data["clean_text"].map(
    lambda x: summarize(x, LsaSummarizer(), 5)
)

In [None]:
data[['summary','LexRankSummary','KLSummary','TextRankSummary','SumBasicSummary','LsaSummary']].sample(5)

Unnamed: 0,summary,LexRankSummary,KLSummary,TextRankSummary,SumBasicSummary,LsaSummary
631,The Lempert-Keene-Seastrand Oil Spill Preventi...,The oil spill prevention and administration fe...,The fee shall be collected by the marine termi...,(1) The oil spill prevention and administratio...,The refinery shall collect the fee from the ow...,The annual assessment shall not exceed six and...
988,Existing law establishes the California Coasta...,"However, a communication made for the purpose ...","A placement agent, as defined in Section 82047...","However, a communication made for the purpose ...",Commission and does not include staff of the c...,For purposes of proceedings before the Califor...
187,Existing law defines specified terms for purpo...,A corporation or other entity with outstanding...,"“Buy” includes exchange, offer to buy, or soli...",“Engage in the business” means the disseminati...,“Borrower” means the loan applicant. The pool ...,“Annual audit” means a certified audit of the ...
812,Existing law requires the adopted course of st...,The people of the State of California do enact...,"Science, including the biological and physical...",Instruction shall provide a foundation for und...,"The adopted course of study for grades 1 to 6,...",The Legislature finds and declares that neithe...
827,Existing law provides for the In-Home Supporti...,"For purposes of this section, “authorized repr...",Specifies the responsibilities to be performed...,"For purposes of this section, “authorized repr...",An individual who is prevented from being a pr...,"For purposes of this section, “authorized repr..."


In [None]:
data['summary'][5]

'The Political Reform Act of 1974 provides for the comprehensive regulation of campaign financing and related matters, including the reporting of gifts, as defined. The act prohibits specified officers from receiving gifts in excess of $440 in value from a single source in a calendar year. The act exempts gift payments for the actual costs of specified types of travel that are reasonably related to a legislative or governmental purpose, or to an issue of state, national, or international public policy, from the annual limit on the value of gifts from a single source. This bill would require a nonprofit organization that regularly organizes and hosts travel for elected officials, as specified, and that pays for these types of travel for an elected state officer or local elected officeholder to disclose the names of donors who, in the preceding year, both donated to the nonprofit organization and accompanied an elected officer or officeholder for any portion of the travel, as specified. 

In [None]:
data['TextRankSummary'][5] 

'Except as provided in subdivision (b), if income is required to be reported under this article, the statement shall contain all of the following: The name and address of each source of income aggregating one thousand dollars ($1,000) or more in value, or fifty dollars ($50) or more in value if the income was a gift, and a general description of the business activity, if any, of each source. Payments, advances, or reimbursements for travel, including actual transportation and related lodging and subsistence that is reasonably related to a legislative or governmental purpose, or to an issue of state, national, or international public policy, are not prohibited or limited by this chapter if either of the following applies: The travel is in connection with a speech given by the elected state officer, local elected officeholder, candidate for elective state office or local elective office, an individual specified in Section 87200, member of a state board or commission, or designated employ

In [None]:
def RougeScore(ModelScore, ModelSummary):

    standard_summary = data["summary"]
    ModelScore_ = rouge.get_scores(ModelSummary, standard_summary, avg=True)
    ModelDF = pd.DataFrame(ModelScore_).set_index(
        [["recall", "precision", "f-measure"]]
    )
    ModelDF.to_csv(ModelScore, index=True, header=True)
    return ModelDF

In [None]:
LexRouge = RougeScore("LexRouge", data["LexRankSummary"])
KLRouge = RougeScore("KLRouge", data["KLSummary"])
TextRankRouge = RougeScore("TextRankRouge", data["TextRankSummary"])
SumBasicRouge = RougeScore("SumBasicRouge", data["SumBasicSummary"])
LsaRouge = RougeScore("LsaRouge", data["LsaSummary"])

In [None]:
score_path = "/content/drive/MyDrive/LegSuM/scores/"

In [None]:
LexRouge.to_csv(score_path +'LexRouge.csv', index=True, header=True)
KLRouge.to_csv(score_path +'KLRouge.csv', index=True, header=True)
TextRankRouge.to_csv(score_path +'TextRankRouge.csv', index=True, header=True)
SumBasicRouge.to_csv(score_path +'SumBasicRouge.csv', index=True, header=True)
LsaRouge.to_csv(score_path +'LsaRouge.csv', index=True, header=True)

In [None]:
TextRankRouge

Unnamed: 0,rouge-1,rouge-2,rouge-l
recall,0.396759,0.207309,0.350393
precision,0.367729,0.198354,0.325992
f-measure,0.365713,0.191063,0.323545


In [None]:
LsaRouge

Unnamed: 0,rouge-1,rouge-2,rouge-l
recall,0.278718,0.107787,0.248816
precision,0.391073,0.178288,0.352547
f-measure,0.30846,0.1246,0.276421
