In [3]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model = AutoModelForSeq2SeqLM.from_pretrained("AlekseyKulnevich/Pegasus-Summarization")
tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-large')


In [4]:
input_text = '''This agreement entered into on this Date by and between Party A hereinafter referred to as the Seller and Party B hereinafter referred to as the Buyer collectively referred to as the Parties This agreement pertains to the sale and transfer of ownership of the real property commonly known as Property Address including all improvements fixtures and appurtenances therein as legally described in Exhibit A attached hereto The Parties acknowledge that this transaction is subject to all applicable laws rules and regulations governing real property transactions within the jurisdiction where the property is located The Buyer agrees to purchase the property in its current as-is condition with no warranties express or implied as to its condition habitability or suitability for any particular purpose except as otherwise specified in this agreement The purchase price for the property is hereby set at Purchase Price payable as follows Payment Terms The Buyer shall provide a deposit of Deposit Amount upon the signing of this agreement to be held in escrow by a mutually agreed-upon escrow agent with the'''
input_ = tokenizer.batch_encode_plus([input_text], max_length=1024, pad_to_max_length=True, 
                truncation=True, padding='longest', return_tensors='pt')
input_ids = input_['input_ids'] 
input_mask = input_['attention_mask']
summary = model.generate(input_ids=input_ids, 
                         attention_mask=input_mask, 
                         num_beams=32, 
                         min_length=100,
                         no_repeat_ngram_size=2, 
                         early_stopping=True, 
                         num_return_sequences=10)
questions = tokenizer.batch_decode(summary, skip_special_tokens=True)

In [5]:
print(questions)

['This agreement is subject to all applicable laws and regulations governing real property transactions in the jurisdiction where the property is located, including the sale and transfer of ownership to the Buyer in accordance with the terms and conditions set forth in Exhibit A attached hereto. The Buyer agrees to pay a deposit of the purchase price to be held in escrow by a mutually agreed-upon escrow agent for a period of 12 months from the closing date of this agreement to ensure that the transaction is completed in compliance with all relevant laws.', 'This agreement is subject to all applicable laws and regulations governing real property transactions in the jurisdiction where the property is located, including the sale and transfer of ownership to the Buyer in accordance with the terms and conditions set forth in Exhibit A attached hereto. The Buyer agrees to pay a deposit of the purchase price to be held in escrow by a mutually agreed-upon escrow agent for a period of three yea

In [16]:
from transformers import pipeline
import pandas as pd
import re

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Define your conversation
conversation = '''Sugi: I am tired of everything in my life. 
Tommy: What? How happy your life is! I do envy you.
Sugi: You don't know that I have been over-protected by my mother these years. I am really about to leave the family and spread my wings.
Tommy: Maybe you are right.                                           
'''

# Load the legal dictionary from an Excel file
legal_dict = pd.read_excel('DIC.xlsx')

# Convert the legal dictionary into a dictionary for easy lookup
legal_to_civilian = dict(zip(legal_dict['WORD'], legal_dict['MEANING']))

# Function to convert legal text to civilian language
def legal_to_civilian_language(legal_text):
    # Replace legal terms with civilian language equivalents
    for term, civilian_term in legal_to_civilian.items():
        legal_text = re.sub(r'\b{}\b'.format(re.escape(term)), civilian_term, legal_text)

    return legal_text

civ_conversation = legal_to_civilian_language(conversation)


# Use the summarizer to generate a summary
summary = summarizer(civ_conversation)
print(summary)


[{'summary_text': "Sugi: I am tired of everything in my become animated.Tommy: How happy your become animated is! I do envy you. Sugi: You don't understand as position (something) so as to make it balanced, level, or square that I talk at great make or become longer."}]


In [19]:

# Check if data is a list with at least one element
if isinstance(summary, list) and len(summary) > 0:
    # Access the first element (in this case, there's only one element)
    first_element = summary[0]

    # Check if the 'summary_text' key exists in the dictionary and if it's a string
    if 'summary_text' in first_element and isinstance(first_element['summary_text'], str):
        summary_text = first_element['summary_text']
        print(summary_text)
    else:
        print("Value for 'summary_text' not found or not a string")
else:
    print("Data is empty or not a list")

Sugi: I am tired of everything in my become animated.Tommy: How happy your become animated is! I do envy you. Sugi: You don't understand as position (something) so as to make it balanced, level, or square that I talk at great make or become longer.
