In [1]:
from datasets import load_dataset
import numpy as np

multi_lexsum = load_dataset("allenai/multi_lexsum", name="v20230518")
modified_dataset = multi_lexsum["test"].filter(lambda x: x["summary/short"] != None)

In [2]:
import spacy
import tiktoken
import torch


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# To get the tokeniser corresponding to a specific model in the OpenAI API:
tokenizer = tiktoken.encoding_for_model("gpt-3.5")

# embedding_model = SentenceTransformer("../models/multi-qa-mpnet-base-dot-v1", device = device).half()

In [3]:
modified_dataset["summary/short"][0]

"The plaintiffs filed a lawsuit on March 8, 2014, alleging that the City of Montgomery, Alabama, improperly imprisoned them for failing to pay traffic fines. They alleged that they did not have an ability to pay the fines due to their financial circumstances and that the city did not consider their ability to pay. On May 1, 2014, the District Court granted the plaintiffs motion for a preliminary injunction, preventing the city from collecting more money from traffic tickets of plaintiffs'. On October 31, 2014 the parties filed to dismiss the case pursuant to a settlement agreement, which included numerous changes to Municipal Court proceedings. The case is now closed."

In [4]:
# user_prompt = "Summarize concisely the following legal texts. Include as many relevant facts as possible. A fact is relevant if it mentions plaintiffs, counsel, type of action, filling date, name of the court, description of class, defendants, statuatory basis, rought remedy, judges, consolidated class, whether it is a class action, date of decree, citations, duration of decrees, last action in case."
system_prompt = "You are a legal expert. You must answer concisely and truthfully, including only information that is relevant to the conversation. Stay faithful to the original text and keep the exact wording as found in the text as closely as possible. Only include facts relevant to the text, without any filler words."

max_context_length = 16384
max_output_length = 130
length_user_prompt = len(tokenizer.encode(user_prompt))
length_system_prompt = len(tokenizer.encode(system_prompt))
print(length_user_prompt)
print(length_system_prompt)

34
60


In [3]:
from openai import OpenAI
from env_utils import load_env_from_file
import json
import os


load_env_from_file(".")

client = OpenAI()

In [6]:
def from_extracted(path, test_size):
    summs = []
    files = os.listdir(path)
    files = sorted(files, key = lambda x: int(x.split(".")[0]))
    for file in files:
        if int(file.split(".")[0]) < test_size:
            docs = json.load(open(path + file, "r"))
            doc = "".join(["".join(doc_sentences[:2]) for doc_sentences in docs])
            summs.append(doc)

    return summs

from_extracted("extracted_sums/extracted_sums_json_first5last5/", 50)

['On top of being unlawfully imprisoned for failure to pay debts owed to the City, Plaintiffs Mitchell and Williams were, pursuant to City policy, coerced with longer unlawful prison terms by City officials if they did not "volunteer" to labor in the City jail under disgusting conditions for an extra credit of $25 per day toward their debts ..It is the policy and practice of the City to tell inmates that their time in City jail can be further reduced if they agree to "work oft" their debts to the City While in jail by laboring at janitorial and other work for the City at a rate of $25 per day toward their debts.\nBased on the subm issions of the parties, the applicable law,\n\nrepresentations made by counsel at the hearing, and the record as a whole, the Court finds\n\nthat Plaintiffs have demonstrated: (1) that Plaintiffs have a substantial likelihood of success\n\non the m erits of its claim that Defendant City of Montgom ery (“the City”) violated their\n\nFourteenth Amendment due pr

In [8]:
from tqdm import tqdm
import pickle

def from_extracted(path, test_size):
    summs = []
    files = os.listdir(path)
    files = sorted(files, key = lambda x: int(x.split(".")[0]))
    for file in files:
        if int(file.split(".")[0]) < test_size:
            docs = json.load(open(path + file, "r"))
            doc = "".join(["".join(doc_sentences[:2]) for doc_sentences in docs])
            summs.append(doc)

    return summs

## CoT summarization
test_size = 100
extract_types = np.asarray(["random_selection", "first5last5", "random_selection_bert", "first5last5_bert"])
prompt_type = "1shot_cot_summarization"
for extract_sum_type in extract_types[[0,2]]:
    responses = []
    basic_responses = []
    path = f"extracted_sums/extracted_sums_json_{extract_sum_type}/"

    user_prompt_summary_basic = "Summarize the text below in 130 words. Let's think about it carefully, considering the importance of each fact in the final summary."\
          + "\n\nSOURCE:{{\n{SOURCE}\n}}\n\nSUMMARY:{{\n{SUMMARY}\n}}\n\nSOURCE:{{\n{SOURCE_Q}\n}}\n\nSUMMARY:"
    user_prompt_revision = "Let's have another look through the summary and source text. Include more important facts, namely: plaintiffs, counsel, taken actions, dates, name of court, defendants, statutory basis, sought remedy, judges, case consolidation, class action, date of decrees, duration of decrees, citations, last action in the case, but within 130 words."
    extracted_summaries = from_extracted(path, test_size=test_size)

    for summ in tqdm(extracted_summaries[1:]):
        basic_prompt = user_prompt_summary_basic.format(SOURCE=extracted_summaries[0], SUMMARY=modified_dataset["summary/short"][0], SOURCE_Q=summ)
        # original summary
        completion = client.chat.completions.create(
            model = "gpt-3.5-turbo-1106",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": basic_prompt}
            ],
            frequency_penalty=0,
            presence_penalty=0,
            top_p=0.2,
            max_tokens=250,
            stop=["SOURCE"]
        )

        basic_summary = completion.choices[0].message.content

        # # get elements from text
        completion = client.chat.completions.create(
            model = "gpt-3.5-turbo-16k",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": basic_prompt},
                {"role": "assistant", "content": basic_summary},
                {"role": "user", "content": user_prompt_revision}
            ],
            frequency_penalty=0,
            presence_penalty=0,
            top_p=0.2,
            max_tokens=250,
            stop=["SOURCE"]
        )

        responses.append(completion.choices[0].message.content)
        basic_responses.append(basic_summary)

    pickle.dump(responses, open(f"{test_size}_predicted_text_{prompt_type}_{extract_sum_type}.pickle", "wb"))
    pickle.dump(basic_responses, open(f"{test_size}_predicted_basic_text_{prompt_type}_{extract_sum_type}.pickle", "wb"))

 74%|███████▍  | 456/615 [50:12<17:30,  6.61s/it] 


BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 18425 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}

In [10]:
pickle.dump(responses, open(f"{test_size}_predicted_text_{prompt_type}_{extract_sum_type}.pickle", "wb"))
pickle.dump(basic_responses, open(f"{test_size}_predicted_basic_text_{prompt_type}_{extract_sum_type}.pickle", "wb"))

In [9]:
responses[0]

"The plaintiffs, Harriet Delores Cleveland and Markis Antwuan Watts, filed a class action lawsuit against the City of Montgomery, Alabama. They were represented by attorneys from the Southern Poverty Law Center. The plaintiffs alleged that they were unlawfully imprisoned for failing to pay traffic fines, violating their constitutional rights. On May 1, 2014, the District Court granted a preliminary injunction, preventing the city from collecting more money from the plaintiffs' traffic tickets. The parties filed to dismiss the case on October 31, 2014, pursuant to a settlement agreement. The case was consolidated with another case, and the plaintiffs sought a declaration that the city's judicial procedures complied with constitutional principles. The judges in the Montgomery Municipal Court were named as defendants. The decrees were issued on May 1, 2014, and October 31, 2014. The case is now closed."

In [None]:
# from tqdm import tqdm
# import pickle

# def from_extracted(path, test_size):
#     summs = []
#     files = os.listdir(path)
#     files = sorted(files, key = lambda x: int(x.split(".")[0]))
#     for file in files:
#         if int(file.split(".")[0]) < test_size:
#             docs = json.load(open(path + file, "r"))
#             doc = "".join(["".join(doc_sentences) for doc_sentences in docs])
#             summs.append(doc)

#     return summs

# ## CoT summarization
# test_size = 10
# extract_types = ["random_selection", "first5last5", "random_selection_bert", "first5last5_bert"]
# prompt_type = "cot_summarization"
# for extract_sum_type in extract_types[:1]:
#     responses = []
#     path = f"extracted_sums/extracted_sums_json_{extract_sum_type}/"

#     user_prompt_basic = "Summarize concisely the following legal texts. Follow the wording of the text exactly:\n{{{INPUT}}}"
#     user_prompt_extract_information = """Consider the following text:\n{{{INPUT}}}
# Please extract the most relevant information from the text:
# """
#     user_prompt_new_summary = """SUMMARY:\n{{{SUMMARY}}}
# RELEVANT INFORMATION:\n{{{INFO}}}
# Refine the summary from SUMMARY such that it integrates information from RELEVANT INFORMATION, while using the same wording of the text:"""
#     extracted_summaries = from_extracted(path, test_size=test_size)
#     break

#     for summ in tqdm(extracted_summaries[:3]):
#         # original summary
#         prompt = user_prompt_basic.format(INPUT = summ)
#         completion = client.chat.completions.create(
#             model = "gpt-3.5-turbo-1106",
#             messages=[
#                 {"role": "system", "content": system_prompt},
#                 {"role": "user", "content": prompt}
#             ],
#             frequency_penalty=0,
#             presence_penalty=0,
#             top_p=0.1
#         )
#         basic_summary = completion.choices[0].message.content

#         # # get elements from text
#         prompt = user_prompt_extract_information.format(INPUT = summ)
#         completion = client.chat.completions.create(
#             model = "gpt-3.5-turbo-1106",
#             messages=[
#                 {"role": "system", "content": system_prompt},
#                 {"role": "user", "content": prompt}
#             ],
#             frequency_penalty=0,
#             presence_penalty=0,
#             top_p=0.1
#         )
#         extracted_information = completion.choices[0].message.content

#         # # get final summary
#         # prompt = user_prompt_new_summary.format(SUMMARY = basic_summary, INFO = extracted_information)

#         prompt = "Please integrate the information you extracted into the summary, including as many events as possible. The resulting summary must have a maximum of 130 words. The output should follow this example: \n{EXAMPLE}".format(EXAMPLE = "The plaintiffs filed a lawsuit on March 8, 2014, alleging that the City of Montgomery, Alabama, improperly imprisoned them for failing to pay traffic fines. They alleged that they did not have an ability to pay the fines due to their financial circumstances and that the city did not consider their ability to pay. On May 1, 2014, the District Court granted the plaintiffs motion for a preliminary injunction, preventing the city from collecting more money from traffic tickets of plaintiffs'. On October 31, 2014 the parties filed to dismiss the case pursuant to a settlement agreement, which included numerous changes to Municipal Court proceedings. The case is now closed.")

#         completion = client.chat.completions.create(
#             model = "gpt-3.5-turbo-1106",
#             messages=[
#                 {"role": "system", "content": system_prompt},
#                 {"role": "assistant", "content": basic_summary},
#                 {"role": "assistant", "content": extracted_information},
#                 {"role": "user", "content": prompt}
#             ],
#             frequency_penalty=0,
#             presence_penalty=0,
#             top_p=0.1
#         )

#         responses.append(completion.choices[0].message.content)

#     pickle.dump(responses, open(f"predicted_text_{prompt_type}_{extract_sum_type}.pickle", "wb"))

In [None]:
import evaluate

# ['led: rouge1: 45.89', 'led: rouge2: 23.00', 'led: rougeL: 31.17', 'led: rougeLsum: 32.01']
# ['primera: rouge1: 42.87', 'primera: rouge2: 20.79', 'primera: rougeL: 29.31', 'primera: rougeLsum: 29.79']

rouge_scoring = evaluate.load("rouge")
print(rouge_scoring.compute(predictions=responses, references=modified_dataset[:test_size]["summary/short"], use_stemmer = True))

{'rouge1': 0.3303134287131386, 'rouge2': 0.10961782740750521, 'rougeL': 0.18189682569527046, 'rougeLsum': 0.1878133183123789}


In [None]:
# simple-random: {'rouge1': 0.3669443053395881, 'rouge2': 0.12299475909651572, 'rougeL': 0.2148706258231231, 'rougeLsum': 0.22091825483890215}
# simple-5/5: {'rouge1': 0.3600777579472476, 'rouge2': 0.11723337466920167, 'rougeL': 0.21617088924165473, 'rougeLsum': 0.22142449133012626}
# simple-randombert: {'rouge1': 0.3542897797369077, 'rouge2': 0.10311088709231235, 'rougeL': 0.2069714451531819, 'rougeLsum': 0.20992633649991677}
# simple-5/5bert: {'rouge1': 0.35138148214931236, 'rouge2': 0.104968022054537, 'rougeL': 0.20538624503075653, 'rougeLsum': 0.20893048554674404}

