In [1]:
import google.generativeai as genai
import pdfplumber
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet

In [2]:
API_KEY = "AIzaSyAmjEFyckxbe0DuTr-nP0up3dCp0I4uPt4"
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")

In [3]:
def extract_text(pdf_path):
    """Extracts text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            extracted = page.extract_text()
            if extracted:
                text += extracted + "\n"
    return text

In [4]:
def save_summary_to_pdf(summary, filename="summary_output.pdf"):
    """Saves AI summary into a nicely formatted PDF file with proper spacing and newlines."""
    doc = SimpleDocTemplate(filename)
    styles = getSampleStyleSheet()

    flow = [Paragraph("AI Legal Document Summary", styles["Title"]), Spacer(1, 20)]

    # Ensure newlines in AI text show properly in PDF
    formatted_summary = summary.replace("\n", "<br/><br/>")
    flow.append(Paragraph(formatted_summary, styles["Normal"]))

    doc.build(flow)
    print(f"📄 Saved: {filename}")



In [5]:
def summarize_document(text):
    prompt = f"""
    Summarize this legal document in plain English.
    Highlight key terms, obligations, payments, penalties, and risks.
    
    Document:
    {text[:5000]}
    """
    return model.generate_content(prompt).text

In [6]:
def extract_entities(text):
    structured_prompt = f"""
    Extract the following structured details from this legal document:

    - Parties involved
    - Effective dates
    - Payment terms
    - Obligations of each party
    - Penalties
    - Risks

    Return the result in JSON format.

    Document:
    {text[:5000]}
    """
    return model.generate_content(structured_prompt).text

In [7]:
def analyze_risks(text):
    risk_prompt = f"""
    Analyze the following legal document and highlight any clauses that might be risky
    for a small business or individual (e.g., heavy penalties, vague obligations, auto-renewal clauses).

    Document:
    {text[:5000]}
    """
    return model.generate_content(risk_prompt).text

In [8]:
def process_documents(pdf_files):
    for pdf_file in pdf_files:
        print(f"\nProcessing: {pdf_file}\n{'-'*50}")
        text = extract_text(pdf_file)

        # Summarize
        summary = summarize_document(text)
        print("Summary:\n", summary[:800], "...\n")  # preview
        save_summary_to_pdf(summary, filename=f"{pdf_file}_summary.pdf")

        # Extract structured entities
        entities = extract_entities(text)
        print("Extracted Entities:\n", entities, "\n")

        # Analyze risks
        risks = analyze_risks(text)
        print("Risks:\n", risks, "\n")

In [9]:
pdf_files = ["silver_certf.pdf"] 
process_documents(pdf_files)


Processing: silver_certf.pdf
--------------------------------------------------
Summary:
 This health certificate, issued by Dr. Millon Rout on October 3rd, 2024, confirms the good health of a 7-month-old female Persian cat named Silver, owned by Mr. Amritansh Panigrahi of Bhubaneswar, Odisha.

**Key Terms:** Health Certificate, Vaccination Record

**Obligations:**  Dr. Rout's obligation is to truthfully certify the cat's health based on his examination. Mr. Panigrahi's obligation is implied – to ensure the cat's continued health and to adhere to any vaccination schedules.

**Payments:** No payments are mentioned in this document.

**Penalties:** No penalties are specified.

**Risks:** The main risk is the potential for inaccurate information on the certificate, should the cat's health change unexpectedly after the examination.  There's also an implied risk of the cat becoming  ...

📄 Saved: silver_certf.pdf_summary.pdf
Extracted Entities:
 ```json
{
  "Parties involved": [
    "Dr Mi