In [69]:
import google.generativeai as genai
import pdfplumber
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet

In [70]:
API_KEY = "AIzaSyAmjEFyckxbe0DuTr-nP0up3dCp0I4uPt4"
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")

In [71]:
def extract_text(pdf_path):
    """Extracts text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            extracted = page.extract_text()
            if extracted:
                text += extracted + "\n"
    return text

In [72]:
def save_summary_to_pdf(summary, filename="summary_output.pdf"):
    """Saves AI summary into a nicely formatted PDF file with proper spacing and newlines."""
    doc = SimpleDocTemplate(filename)
    styles = getSampleStyleSheet()

    flow = [Paragraph("AI Legal Document Summary", styles["Title"]), Spacer(1, 20)]

    # Ensure newlines in AI text show properly in PDF
    formatted_summary = summary.replace("\n", "<br/><br/>")
    flow.append(Paragraph(formatted_summary, styles["Normal"]))

    doc.build(flow)
    print(f"📄 Saved: {filename}")



In [73]:
def summarize_document(text):
    prompt = f"""
    Summarize this legal document in plain English.
    Highlight key terms, obligations, payments, penalties, and risks.
    
    Document:
    {text[:5000]}
    """
    return model.generate_content(prompt).text

In [74]:
def extract_entities(text):
    structured_prompt = f"""
    Extract the following structured details from this legal document:

    - Parties involved
    - Effective dates
    - Payment terms
    - Obligations of each party
    - Penalties
    - Risks

    Return the result in JSON format.

    Document:
    {text[:5000]}
    """
    return model.generate_content(structured_prompt).text

In [75]:
def analyze_risks(text):
    risk_prompt = f"""
    Analyze the following legal document and highlight any clauses that might be risky
    for a small business or individual (e.g., heavy penalties, vague obligations, auto-renewal clauses).

    Document:
    {text[:5000]}
    """
    return model.generate_content(risk_prompt).text

In [None]:
def process_documents(pdf_files):
    for pdf_file in pdf_files:
        print(f"\nProcessing: {pdf_file}\n{'-'*50}")
        text = extract_text(pdf_file)

        # Summarize
        summary = summarize_document(text)
        print("Summary:\n", summary[:800], "...\n")  # preview
        save_summary_to_pdf(summary, filename=f"{pdf_file}_summary.pdf")

        # Extract structured entities
        entities = extract_entities(text)
        print("Extracted Entities:\n", entities, "\n")

        # Analyze risks
        risks = analyze_risks(text)
        print("Risks:\n", risks, "\n")

In [77]:
pdf_files = ["sample_legal_document.pdf"] 
process_documents(pdf_files)


📄 Processing: sample_legal_document.pdf
--------------------------------------------------
✅ Summary:
 This Non-Disclosure Agreement (NDA) is a contract between Party A and Party B to keep secrets.  **Key Term:** *Confidential Information* includes things like trade secrets, financial info, and technical processes.

**Obligations:** Party B (the receiving party) promises not to share any confidential information with anyone else without Party A's (the disclosing party) written permission.  This obligation lasts for **three years** from the date the information is shared.

**Payments:**  The document doesn't specify any payments.

**Penalties:** If Party B breaks the agreement (a **breach**), Party A can sue them.  They can ask a court to stop Party B from sharing the information (**injunction**) and to pay for any damages caused by the breach.

**Risks:** The main risk for Party A is that t ...

📄 Saved: sample_legal_document.pdf_summary.pdf
📌 Extracted Entities:
 ```json
{
  "Parties