In [2]:
import pandas as pd
import numpy as np
import textwrap
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
import json
import time
from typing import List, Dict, Optional, Any
from openai import OpenAI
from dotenv import load_dotenv
import os

# --- EMBEDDING MODEL CONFIGURATION ---
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
K = 3 # Number of top results to retrieve
embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_retriever(FAISS_INDEX_PATH, K):
    """
    Loads the saved FAISS index and returns the retriever object for runtime use.
    """
    print("\n--- LOADING RAG 1 RETRIEVER ---")
    
    if not os.path.exists(FAISS_INDEX_PATH):
        # This is a critical error if the index should be pre-built
        raise FileNotFoundError(
            f"FAISS index not found at {FAISS_INDEX_PATH}. "
            "Please run 'create_and_save_ideal_index()' first."
        )

    # 1. Load Vector Store
    # IMPORTANT: You still need the 'embeddings' model object to load the index
    vectorstore = FAISS.load_local(
        FAISS_INDEX_PATH, 
        embeddings, 
        allow_dangerous_deserialization=True # Required by LangChain for loading
    )
    print("FAISS index loaded successfully.")

    # 2. Return Retriever
    return vectorstore.as_retriever(search_kwargs={"k": K})

In [3]:
# --- RAG 1: Ideal Clauses Configuration ---
IDEAL_CLAUSES_FAISS = "../faiss_index_ideal_clauses"
K = 3

# --- GLOBAL RESOURCE INITIALIZATION (RAG 1) ---
try:
    # Assuming 'embeddings' is globally available and load_retriever is imported
    ideal_clauses_retriever = load_retriever(IDEAL_CLAUSES_FAISS, K)
except Exception as e:
    print(f"CRITICAL: Failed to load ideal clauses RAG index: {e}")
    ideal_clauses_retriever = None


--- LOADING RAG 1 RETRIEVER ---
FAISS index loaded successfully.


In [3]:
def format_llm_report(parsed_json: Dict) -> str:
# Check for failure state
    if parsed_json.get("clause_summary") == "Analysis Failed" or parsed_json.get("feedback") == "API Initialization Failed":
        return f"# ❌ Report Generation Failed\n\n**Error:** {parsed_json.get('feedback', 'Unknown error.')}\n\n**Suggestion:** {parsed_json.get('suggestion', 'Please check logs.')}"

    # Extract required fields (assuming successful JSON structure)
    summary = parsed_json.get("clause_summary", "N/A")
    risk = parsed_json.get("risk_level", "LOW")
    feedback = parsed_json.get("feedback", "No specific feedback provided.")
    suggestion = parsed_json.get("suggestion", "No specific suggestion provided.")

    # Determine risk badge style
    risk_color = ""
    if risk == "HIGH":
        risk_color = "🔴"
    elif risk == "MEDIUM":
        risk_color = "🟠"
    else:
        risk_color = "🟢"

    report_lines = []
    
    # 1. Title and Summary
    report_lines.append(f"## Tenancy Agreement Report: {summary}")
    report_lines.append(f"\n---")
    
    # 2. Risk Level
    report_lines.append(f"### {risk_color} Risk Level: **{risk}**")
    
    # 3. Detailed Feedback (The core of the critique)
    report_lines.append(f"\n### 📝 Detailed Critique")
    # Use a blockquote for the main feedback to make it stand out
    report_lines.append(f"> {feedback}")
    
    # 4. Actionable Suggestion
    report_lines.append(f"\n### 🛠️ Actionable Suggestion")
    report_lines.append(f"**Action:** {suggestion}")
    
    return "\n".join(report_lines)

In [4]:
# 1. STORE THE RAW JSON AS A PYTHON STRING
raw_json_string = """
{
  "clause_summary": "This clause attempts to make the tenant responsible for all repairs, including fair wear and tear, and waives the landlord's liability.",
  "risk_level": "HIGH",
  "feedback": "The clause is substantially imbalanced and poses a high risk to the tenant. It attempts to shift the burden of all property repairs (including those caused by normal wear and tear and non-tenant defects) onto the tenant, which is contrary to standard residential tenancy law. Additionally, the broad waiver of liability for the landlord may be unenforceable, but its presence is highly concerning as it discourages the tenant from seeking remedies for issues like negligence.",
  "suggestion": "The user must demand that the clause be revised to explicitly exclude tenant liability for fair wear and tear, structural defects, and major appliance replacement. Consult a legal professional for a review of the liability waiver."
}
"""

In [5]:
try:
    parsed_json_data = json.loads(raw_json_string)
    print("✅ JSON loaded successfully into a Python dictionary.")

except json.JSONDecodeError as e:
    print(f"❌ Error decoding JSON: {e}")
    parsed_json_data = None


# 3. TEST YOUR PYTHON FUNCTION
if parsed_json_data:
    print("\n--- Testing format_llm_report_to_markdown ---")
    
    # Call your function with the parsed dictionary
    markdown_report = format_llm_report(parsed_json_data)
    
    print("\n\n--- GENERATED MARKDOWN REPORT ---")
    print(markdown_report)
    print("---------------------------------")

✅ JSON loaded successfully into a Python dictionary.

--- Testing format_llm_report_to_markdown ---


--- GENERATED MARKDOWN REPORT ---
## Tenancy Agreement Report: This clause attempts to make the tenant responsible for all repairs, including fair wear and tear, and waives the landlord's liability.

---
### 🔴 Risk Level: **HIGH**

### 📝 Detailed Critique
> The clause is substantially imbalanced and poses a high risk to the tenant. It attempts to shift the burden of all property repairs (including those caused by normal wear and tear and non-tenant defects) onto the tenant, which is contrary to standard residential tenancy law. Additionally, the broad waiver of liability for the landlord may be unenforceable, but its presence is highly concerning as it discourages the tenant from seeking remedies for issues like negligence.

### 🛠️ Actionable Suggestion
**Action:** The user must demand that the clause be revised to explicitly exclude tenant liability for fair wear and tear, structural 

In [6]:
def review_report(review_data: List[Dict]) -> str:
    """
    Generates a full Markdown report showing the user clause, RAG context, and LLM feedback
    for every item in the review_data list.
    """
    full_report_sections = []

    for item in review_data:
        clause_num = item['clause_number']
        user_clause = item['user_clause']
        context_docs = item['comparison_context']
        llm_output = item['llm_feedback']

        # 1. Start Section for the Clause
        full_report_sections.append(f"# 🔍 Analysis for Clause {clause_num}")
        full_report_sections.append("---")
        
        # 2. Present the User Clause (The Query)
        full_report_sections.append(f"## 📜 User Clause (Query)")
        full_report_sections.append(f"```markdown\n{user_clause.strip()}\n```") # Use a code block for clean display
        full_report_sections.append("\n")

        # 3. Present the Comparison Context (The RAG Retrieval)
        full_report_sections.append("## 📚 Ideal Clause Context (RAG Retrieval)")
        
        if context_docs:
            for i, doc in enumerate(context_docs):
                source = doc.metadata.get('source', 'Unknown').split('/')[-1].split('\\')[-1]
                page = doc.metadata.get('page_label', 'N/A')
                content = doc.page_content.strip()
                
                full_report_sections.append(f"### Context Document {i+1} (Source: {source}, Page: {page})")
                full_report_sections.append(f"> {content[:500]}...") # Limit to first 500 chars
        else:
            full_report_sections.append("*No relevant ideal clauses were found for comparison.*")

        full_report_sections.append("\n---")
        
        # 4. Present the LLM Feedback (Formatted Output)
        full_report_sections.append("## ✨ LLM Critique and Action Plan")
        
        # Use the formatting function we already created
        formatted_critique = format_llm_report(llm_output)
        full_report_sections.append(formatted_critique)
        
        full_report_sections.append("\n***\n") # Strong separator between clauses

    return "\n".join(full_report_sections)

In [7]:
# 1. Store the raw JSON as a multi-line string
SIMULATED_REVIEW_DATA_JSON = """
[
  {
    "clause_number": 1,
    "user_clause": "Disclaimer: This document shall not be modified. Any modifications or oral agreements made by the tenant are void. Landlord may enter the premises with 1 hour notice for any reason.",
    "comparison_context": [
      {"id": "doc-a1b2c3d4", "metadata": {"source": "Ideal_TA_Template_v2.pdf", "page_label": "4"}, "page_content": "Tenant's right to quiet enjoyment is paramount. Landlord must provide minimum 24 hours' written notice for non-emergency entry..."},
      {"id": "doc-e5f6g7h8", "metadata": {"source": "Legal_Code_Section_101.pdf", "page_label": "3"}, "page_content": "A standard form contract clause may not waive the tenant's right to pursue remedies for breach of contract or negligence."}
    ],
    "llm_feedback": {
      "clause_summary": "Clause attempts to void tenant modifications and establish inadequate landlord notice for entry.",
      "risk_level": "HIGH",
      "feedback": "The 1-hour entry notice severely violates the tenant's right to quiet enjoyment and privacy, which is a common statutory protection...",
      "suggestion": "Demand the landlord change the entry clause to a minimum of 24 hours' written notice..."
    }
  },
  {
    "clause_number": 2,
    "user_clause": "The monthly rent is S$3,500.00, payable on the 1st of every month. Late payments will incur a penalty of 10% of the rent, compounded daily.",
    "comparison_context": [
      {"id": "doc-i9j0k1l2", "metadata": {"source": "Ideal_TA_Template_v2.pdf", "page_label": "5"}, "page_content": "Rent is due on the due date. A late fee, if any, must be a reasonable estimate of the administrative costs incurred..."}
    ],
    "llm_feedback": {
      "clause_summary": "Clause addresses monthly rent payment and late payment penalties.",
      "risk_level": "MEDIUM",
      "feedback": "The clause contains an excessive and potentially illegal late fee structure. A 10% compounded daily penalty is not a reasonable reflection of administrative loss...",
      "suggestion": "Negotiate the penalty down to a flat, non-compounding fee (e.g., 1% to 2% of rent, or a fixed amount) and require a 3-5 day grace period..."
    }
  }
]
"""