<a href="https://colab.research.google.com/github/data4class/Teaching/blob/main/Legal_drafting_examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

In [None]:
!pip install groq reportlab

In [None]:
import re
from datetime import datetime
from IPython.display import display, Markdown
from google.colab import files
from groq import Groq
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
import io

# Template definitions
templates = {
    'loan_agreement': """
BUSINESS LOAN AGREEMENT

This Business Loan Agreement is executed on {date} at {location}, India.

BETWEEN:
1. [LENDER NAME], a company incorporated under the Companies Act, 2013 (the "LENDER")
2. [BORROWER NAME], a company incorporated under the Companies Act, 2013 (the "BORROWER")

1. LOAN AMOUNT: Rs. {amount}
2. INTEREST RATE: [INTEREST RATE]% per annum
3. REPAYMENT PERIOD: {duration}
4. SECURITY: [SECURITY DETAILS]

5. DEFAULT PROVISIONS:
Standard default provisions apply. In case of default, entire amount becomes due.

6. GOVERNING LAW:
This Agreement shall be governed by laws of India. Disputes subject to {location} jurisdiction.

LENDER: _________________    BORROWER: _________________
""",
    'service_agreement': """
SERVICE AGREEMENT

This Service Agreement is executed on {date} at {location}, India.

BETWEEN:
1. [SERVICE PROVIDER] (the "Provider")
2. [CLIENT] (the "Client")

1. SERVICES: [SERVICE DESCRIPTION]
2. PAYMENT: Rs. {amount}
3. DURATION: {duration}
4. DELIVERABLES: [TO BE SPECIFIED]

5. STANDARD TERMS:
- Confidentiality provisions apply
- Standard termination clauses
- Governing law: India

PROVIDER: _________________    CLIENT: _________________
"""
}

def extract_parameters(user_input: str) -> dict:
    """Extract basic parameters using simple pattern matching"""
    params = {
        'document_type': 'loan_agreement',  # default
        'amount': '[AMOUNT]',
        'duration': '[DURATION]',
        'location': 'Mumbai',
        'date': datetime.now().strftime("%B %d, %Y")
    }

    input_lower = user_input.lower()

    # Document type detection
    if any(word in input_lower for word in ['service', 'consulting']):
        params['document_type'] = 'service_agreement'

    # Amount extraction
    amount_match = re.search(r'(?:rs\.?\s*|rupees?\s*)([0-9,]+(?:\.[0-9]+)?)', input_lower)
    if amount_match:
        params['amount'] = amount_match.group(1)

    # Duration extraction
    duration_match = re.search(r'(\d+)\s*(year|month)s?', input_lower)
    if duration_match:
        params['duration'] = f"{duration_match.group(1)} {duration_match.group(2)}(s)"

    # Location extraction
    cities = ['mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune', 'hyderabad']
    for city in cities:
        if city in input_lower:
            params['location'] = city.title()
            break

    return params

def generate_template_document(user_input: str) -> str:
    """Generate document using templates"""
    params = extract_parameters(user_input)
    template = templates.get(params['document_type'], templates['loan_agreement'])
    return template.format(**params)

def generate_ai_document(user_input: str) -> str:
    """Generate document using Groq AI"""
    try:
        with open('groq_api_key.txt', 'r') as file:
            api_key = file.read().strip()
        client = Groq(api_key=api_key)
    except Exception:
        return fallback_ai_simulation(user_input)

    try:
        prompt = f"""
You are a legal assistant specializing in Indian commercial law. Based on the following request, generate a professional legal document following Indian legal conventions:

Request: "{user_input}"

Requirements:
1. Use proper Indian legal document structure
2. Include relevant Indian law references
3. Use formal legal language appropriate for Indian courts
4. Include proper execution clauses
5. Make it contextually appropriate for the specific request

Generate a complete, professionally formatted document.
"""
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": "You are an expert in Indian commercial law and legal document drafting."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=1500,
            temperature=0.3
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"API Error: {str(e)}\n\nFalling back to AI simulation:\n\n{fallback_ai_simulation(user_input)}"

def fallback_ai_simulation(user_input: str) -> str:
    """Simulated AI response when no API key available"""
    input_lower = user_input.lower()

    if 'startup' in input_lower or 'fintech' in input_lower:
        return """
BUSINESS LOAN AGREEMENT (AI-GENERATED - STARTUP CONTEXT)

This Business Loan Agreement is executed considering the startup nature of the borrower and associated risks.

BETWEEN:
1. [LENDER] - Professional lending institution
2. [BORROWER] - Startup company with innovative business model

WHEREAS the Borrower operates in a high-growth, technology-driven sector with inherent risks and opportunities;

1. LOAN TERMS:
   - Amount: As specified in request
   - Interest: Market rate + startup risk premium
   - Tenure: Flexible repayment aligned with business cash flows

2. STARTUP-SPECIFIC PROVISIONS:
   - Milestone-based disbursements
   - Equity conversion options if applicable
   - Relaxed collateral requirements considering IP assets

3. ENHANCED RISK PROVISIONS:
   - Regular financial reporting requirements
   - Board observer rights for lender
   - First right of refusal on future funding rounds

4. REGULATORY COMPLIANCE:
   - Compliance with RBI guidelines for startup lending
   - DPIIT recognition requirements if applicable
   - Sector-specific regulatory adherence

5. EXIT PROVISIONS:
   - Prepayment options without penalty
   - Conversion to equity upon trigger events
   - Acceleration clauses adapted for startup scenarios

This agreement reflects the unique aspects of startup lending in the Indian ecosystem.
"""
    elif 'service' in input_lower or 'consulting' in input_lower:
        return """
PROFESSIONAL SERVICE AGREEMENT (AI-GENERATED)

This Agreement recognizes the professional service nature and includes appropriate IP and deliverable provisions.

PARTIES:
Service Provider and Client as identified in the request

SCOPE OF WORK:
- Detailed service specifications based on professional standards
- Clear deliverable milestones and timelines
- Quality benchmarks and acceptance criteria

INTELLECTUAL PROPERTY:
- Work product ownership clearly defined
- Pre-existing IP protection
- Derivative work rights allocation

PAYMENT AND BILLING:
- Professional fee structure
- Milestone-based payments
- Expense reimbursement terms

PROFESSIONAL STANDARDS:
- Industry best practices compliance
- Confidentiality and non-disclosure
- Professional liability limitations

TERMINATION AND TRANSITION:
- Professional handover requirements
- Knowledge transfer obligations
- Post-termination restrictions
"""
    else:
        return """
BUSINESS LOAN AGREEMENT (AI-GENERATED)

This Agreement has been contextually generated based on the specific requirements mentioned.

KEY AI ENHANCEMENTS:
- Analyzed input for business context and risk factors
- Selected appropriate legal framework for the transaction
- Included relevant regulatory compliance requirements
- Customized terms based on parties' likely needs

CONTEXTUAL PROVISIONS:
- Risk assessment based on business type mentioned
- Appropriate security and guarantee structures
- Regulatory compliance for the specific sector
- Termination and default provisions suited to the context

LEGAL FRAMEWORK:
- Indian Contract Act 1872 compliance
- RBI guidelines adherence where applicable
- Companies Act 2013 provisions for corporate parties
- Stamp Act and Registration Act compliance
"""

def generate_pdf(ai_content: str, doc_type: str) -> io.BytesIO:
    """Generate PDF for the AI-powered document using reportlab"""
    buffer = io.BytesIO()
    doc = SimpleDocTemplate(buffer, pagesize=A4)
    styles = getSampleStyleSheet()
    style = styles['Normal']
    style.fontName = 'Times-Roman'
    style.fontSize = 12
    style.leading = 14

    # Split content into paragraphs and format
    elements = []
    elements.append(Paragraph(doc_type.upper(), styles['Title']))
    elements.append(Spacer(1, 12))
    elements.append(Paragraph(f"Generated on {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
    elements.append(Spacer(1, 12))

    # Split AI content into paragraphs
    paragraphs = ai_content.split('\n\n')
    for para in paragraphs:
        # Replace multiple newlines and clean up
        para = para.strip().replace('\n', '<br/>')
        elements.append(Paragraph(para, style))
        elements.append(Spacer(1, 12))

    doc.build(elements)
    buffer.seek(0)
    return buffer

def main():
    display(Markdown("# Legal Drafting Comparison"))
    display(Markdown("Enter a legal document requirement (e.g., 'loan agreement for Rs. 50 lakhs for 3 years in Mumbai')"))

    user_input = input("Your input: ")

    if not user_input.strip():
        display(Markdown("**Error**: Please enter a legal document requirement"))
        return

    # Generate template-based document
    display(Markdown("## Template-Based Document"))
    template_result = generate_template_document(user_input)
    display(Markdown(f"```\n{template_result}\n```"))

    # Generate AI-powered document
    display(Markdown("## AI-Powered Document"))
    ai_result = generate_ai_document(user_input)
    display(Markdown(f"```\n{ai_result}\n```"))

    # Generate and save PDF
    display(Markdown("## Generating PDF"))
    params = extract_parameters(user_input)
    doc_type = params['document_type'].replace('_', ' ').title()

    try:
        pdf_buffer = generate_pdf(ai_result, doc_type)
        pdf_filename = f"{doc_type.lower().replace(' ', '_')}.pdf"
        with open(pdf_filename, "wb") as f:
            f.write(pdf_buffer.getvalue())
        files.download(pdf_filename)
        display(Markdown(f"PDF generated and downloaded as `{pdf_filename}`"))
    except Exception as e:
        display(Markdown(f"**PDF generation failed**: {str(e)}"))

if __name__ == "__main__":
    main()

In [None]:
!pip install groq latexmk
!apt-get install texlive-full

In [None]:
import re
from datetime import datetime
import io
import latexmk
from IPython.display import display, Markdown
from google.colab import files
from groq import Groq

# Template definitions
templates = {
    'loan_agreement': """
BUSINESS LOAN AGREEMENT

This Business Loan Agreement is executed on {date} at {location}, India.

BETWEEN:
1. [LENDER NAME], a company incorporated under the Companies Act, 2013 (the "LENDER")
2. [BORROWER NAME], a company incorporated under the Companies Act, 2013 (the "BORROWER")

1. LOAN AMOUNT: Rs. {amount}
2. INTEREST RATE: [INTEREST RATE]% per annum
3. REPAYMENT PERIOD: {duration}
4. SECURITY: [SECURITY DETAILS]

5. DEFAULT PROVISIONS:
Standard default provisions apply. In case of default, entire amount becomes due.

6. GOVERNING LAW:
This Agreement shall be governed by laws of India. Disputes subject to {location} jurisdiction.

LENDER: _________________    BORROWER: _________________
""",
    'service_agreement': """
SERVICE AGREEMENT

This Service Agreement is executed on {date} at {location}, India.

BETWEEN:
1. [SERVICE PROVIDER] (the "Provider")
2. [CLIENT] (the "Client")

1. SERVICES: [SERVICE DESCRIPTION]
2. PAYMENT: Rs. {amount}
3. DURATION: {duration}
4. DELIVERABLES: [TO BE SPECIFIED]

5. STANDARD TERMS:
- Confidentiality provisions apply
- Standard termination clauses
- Governing law: India

PROVIDER: _________________    CLIENT: _________________
"""
}

def extract_parameters(user_input: str) -> dict:
    """Extract basic parameters using simple pattern matching"""
    params = {
        'document_type': 'loan_agreement',  # default
        'amount': '[AMOUNT]',
        'duration': '[DURATION]',
        'location': 'Mumbai',
        'date': datetime.now().strftime("%B %d, %Y")
    }

    input_lower = user_input.lower()

    # Document type detection
    if any(word in input_lower for word in ['service', 'consulting']):
        params['document_type'] = 'service_agreement'

    # Amount extraction
    amount_match = re.search(r'(?:rs\.?\s*|rupees?\s*)([0-9,]+(?:\.[0-9]+)?)', input_lower)
    if amount_match:
        params['amount'] = amount_match.group(1)

    # Duration extraction
    duration_match = re.search(r'(\d+)\s*(year|month)s?', input_lower)
    if duration_match:
        params['duration'] = f"{duration_match.group(1)} {duration_match.group(2)}(s)"

    # Location extraction
    cities = ['mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune', 'hyderabad']
    for city in cities:
        if city in input_lower:
            params['location'] = city.title()
            break

    return params

def generate_template_document(user_input: str) -> str:
    """Generate document using templates"""
    params = extract_parameters(user_input)
    template = templates.get(params['document_type'], templates['loan_agreement'])
    return template.format(**params)

def generate_ai_document(user_input: str) -> str:
    """Generate document using Groq AI"""
    try:
        with open('groq_api_key.txt', 'r') as file:
            api_key = file.read().strip()
        client = Groq(api_key=api_key)
    except Exception:
        return fallback_ai_simulation(user_input)

    try:
        prompt = f"""
You are a legal assistant specializing in Indian commercial law. Based on the following request, generate a professional legal document following Indian legal conventions:

Request: "{user_input}"

Requirements:
1. Use proper Indian legal document structure
2. Include relevant Indian law references
3. Use formal legal language appropriate for Indian courts
4. Include proper execution clauses
5. Make it contextually appropriate for the specific request

Generate a complete, professionally formatted document.
"""
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": "You are an expert in Indian commercial law and legal document drafting."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=1500,
            temperature=0.3
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"API Error: {str(e)}\n\nFalling back to AI simulation:\n\n{fallback_ai_simulation(user_input)}"

def fallback_ai_simulation(user_input: str) -> str:
    """Simulated AI response when no API key available"""
    input_lower = user_input.lower()

    if 'startup' in input_lower or 'fintech' in input_lower:
        return """
BUSINESS LOAN AGREEMENT (AI-GENERATED - STARTUP CONTEXT)

This Business Loan Agreement is executed considering the startup nature of the borrower and associated risks.

BETWEEN:
1. [LENDER] - Professional lending institution
2. [BORROWER] - Startup company with innovative business model

WHEREAS the Borrower operates in a high-growth, technology-driven sector with inherent risks and opportunities;

1. LOAN TERMS:
   - Amount: As specified in request
   - Interest: Market rate + startup risk premium
   - Tenure: Flexible repayment aligned with business cash flows

2. STARTUP-SPECIFIC PROVISIONS:
   - Milestone-based disbursements
   - Equity conversion options if applicable
   - Relaxed collateral requirements considering IP assets

3. ENHANCED RISK PROVISIONS:
   - Regular financial reporting requirements
   - Board observer rights for lender
   - First right of refusal on future funding rounds

4. REGULATORY COMPLIANCE:
   - Compliance with RBI guidelines for startup lending
   - DPIIT recognition requirements if applicable
   - Sector-specific regulatory adherence

5. EXIT PROVISIONS:
   - Prepayment options without penalty
   - Conversion to equity upon trigger events
   - Acceleration clauses adapted for startup scenarios

This agreement reflects the unique aspects of startup lending in the Indian ecosystem.
"""
    elif 'service' in input_lower or 'consulting' in input_lower:
        return """
PROFESSIONAL SERVICE AGREEMENT (AI-GENERATED)

This Agreement recognizes the professional service nature and includes appropriate IP and deliverable provisions.

PARTIES:
Service Provider and Client as identified in the request

SCOPE OF WORK:
- Detailed service specifications based on professional standards
- Clear deliverable milestones and timelines
- Quality benchmarks and acceptance criteria

INTELLECTUAL PROPERTY:
- Work product ownership clearly defined
- Pre-existing IP protection
- Derivative work rights allocation

PAYMENT AND BILLING:
- Professional fee structure
- Milestone-based payments
- Expense reimbursement terms

PROFESSIONAL STANDARDS:
- Industry best practices compliance
- Confidentiality and non-disclosure
- Professional liability limitations

TERMINATION AND TRANSITION:
- Professional handover requirements
- Knowledge transfer obligations
- Post-termination restrictions
"""
    else:
        return """
BUSINESS LOAN AGREEMENT (AI-GENERATED)

This Agreement has been contextually generated based on the specific requirements mentioned.

KEY AI ENHANCEMENTS:
- Analyzed input for business context and risk factors
- Selected appropriate legal framework for the transaction
- Included relevant regulatory compliance requirements
- Customized terms based on parties' likely needs

CONTEXTUAL PROVISIONS:
- Risk assessment based on business type mentioned
- Appropriate security and guarantee structures
- Regulatory compliance for the specific sector
- Termination and default provisions suited to the context

LEGAL FRAMEWORK:
- Indian Contract Act 1872 compliance
- RBI guidelines adherence where applicable
- Companies Act 2013 provisions for corporate parties
- Stamp Act and Registration Act compliance
"""

def generate_latex_document(ai_content: str, doc_type: str) -> str:
    """Generate LaTeX source for the AI-powered document"""
    # Escape special LaTeX characters
    ai_content = ai_content.replace('\\', r'\textbackslash{}')
    ai_content = ai_content.replace('&', r'\&')
    ai_content = ai_content.replace('%', r'\%')
    ai_content = ai_content.replace('$', r'\$')
    ai_content = ai_content.replace('#', r'\#')
    ai_content = ai_content.replace('_', r'\_')
    ai_content = ai_content.replace('{', r'\{')
    ai_content = ai_content.replace('}', r'\}')
    ai_content = ai_content.replace('~', r'\textasciitilde{}')
    ai_content = ai_content.replace('^', r'\textasciicircum{}')

    # Basic LaTeX structure
    latex_content = r"""
\documentclass[a4paper,12pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{geometry}
\geometry{margin=1in}
\usepackage{parskip}
\usepackage{enumitem}
\usepackage{titlesec}
\titleformat{\section}{\large\bfseries}{\thesection.}{0.5em}{}
\titleformat{\subsection}{\normalsize\bfseries}{\thesubsection}{0.5em}{}
\usepackage{times}

\begin{document}

\begin{center}
    \textbf{\Large """ + doc_type.upper() + r"""}

    \vspace{0.5cm}
    Generated on """ + datetime.now().strftime("%B %d, %Y") + r"""
\end{center}

\vspace{0.5cm}

""" + ai_content + r"""

\end{document}
"""
    return latex_content

def main():
    display(Markdown("# Legal Drafting Comparison"))
    display(Markdown("Enter a legal document requirement (e.g., 'loan agreement for Rs. 50 lakhs for 3 years in Mumbai')"))

    user_input = input("Your input: ")

    if not user_input.strip():
        display(Markdown("**Error**: Please enter a legal document requirement"))
        return

    # Generate template-based document
    display(Markdown("## Template-Based Document"))
    template_result = generate_template_document(user_input)
    display(Markdown(f"```\n{template_result}\n```"))

    # Generate AI-powered document
    display(Markdown("## AI-Powered Document"))
    ai_result = generate_ai_document(user_input)
    display(Markdown(f"```\n{ai_result}\n```"))

    # Generate and save PDF
    display(Markdown("## Generating PDF"))
    params = extract_parameters(user_input)
    doc_type = params['document_type'].replace('_', ' ').title()
    latex_content = generate_latex_document(ai_result, doc_type)

    try:
        pdf_buffer = io.BytesIO()
        latexmk.compile(latex_content, output_format='pdf', output_buffer=pdf_buffer)
        pdf_buffer.seek(0)
        with open(f"{doc_type.lower().replace(' ', '_')}.pdf", "wb") as f:
            f.write(pdf_buffer.getvalue())
        files.download(f"{doc_type.lower().replace(' ', '_')}.pdf")
        display(Markdown(f"PDF generated and downloaded as `{doc_type.lower().replace(' ', '_')}.pdf`"))
    except Exception as e:
        display(Markdown(f"**PDF generation failed**: {str(e)}"))

if __name__ == "__main__":
    main()

#Document Similarity and Classifications

1. Explain TF-IDF vectorization concept: "Converting legal text to numbers"
2. Show cosine similarity: "How we measure document similarity mathematically"
3. Demonstrate classification: "How AI learns to categorize legal documents"

In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import pandas as pd

# Sample Indian legal documents for demonstration
sample_documents = [
    # Sale Agreements
    "This Sale Agreement is executed on this day between the Vendor Rajesh Kumar, son of Late Shri Ram Kumar, aged 45 years, residing at Mumbai, Maharashtra (hereinafter called the VENDOR) and the Purchaser Priya Sharma, daughter of Shri Mohan Sharma, aged 35 years, residing at Delhi (hereinafter called the PURCHASER). The Vendor agrees to sell the property situated at Plot No. 123, Sector 15, Gurgaon, Haryana for a consideration of Rs. 50,00,000 (Fifty Lakh Rupees only). The sale is subject to clear title and all statutory approvals.",

    "Sale Deed executed between Seller Amit Patel, son of Shri Kishore Patel, aged 40 years, resident of Ahmedabad, Gujarat and Buyer Sneha Joshi, daughter of Shri Prakash Joshi, aged 30 years, resident of Pune, Maharashtra. The property being sold is located at Survey No. 45/2, Village Baner, Pune for total consideration of Rs. 75,00,000. All documents related to the property are clear and marketable title is guaranteed.",

    # Rental Agreements
    "This Rental Agreement is made between the Landlord Suresh Gupta, aged 50 years, residing at Flat No. 201, Green Park, New Delhi and the Tenant Kavita Singh, aged 28 years, working as Software Engineer. The premises located at Flat No. 105, Tower A, DLF Phase 2, Gurgaon is rented for monthly rent of Rs. 25,000. The tenancy period is 11 months commencing from 1st April 2024. Security deposit of Rs. 50,000 is paid in advance.",

    "Lease Agreement between Lessor Ramesh Iyer, son of Late K. Iyer, aged 55 years, residing at Chennai, Tamil Nadu and Lessee Deepak Mehta, aged 32 years, employed with TCS Limited. The leased premises is Shop No. 15, Commercial Complex, Anna Nagar, Chennai for monthly rental of Rs. 40,000. Lease period is 36 months from 1st January 2024 with option to renew.",

    # Employment Contracts
    "Employment Agreement between ABC Technologies Private Limited, a company incorporated under Companies Act 2013, having its registered office at Bangalore, Karnataka (hereinafter called COMPANY) and Mr. Rohit Sharma, son of Shri Ved Sharma, aged 29 years, residing at Hyderabad (hereinafter called EMPLOYEE). The Employee is appointed as Senior Software Developer with annual CTC of Rs. 12,00,000. The employment is subject to 3 months probation period and 2 months notice period for termination.",

    "Service Agreement executed between XYZ Consultants LLP, a Limited Liability Partnership firm registered in Mumbai, Maharashtra and Ms. Anjali Verma, daughter of Shri Raj Verma, aged 26 years. The position offered is Business Analyst with gross salary of Rs. 8,50,000 per annum. Employment terms include medical insurance, provident fund contributions as per statutory requirements.",

    # Court Judgments
    "In the matter of Civil Appeal No. 1234 of 2023, the Hon'ble Supreme Court of India comprising of Justice A.K. Sharma and Justice B.R. Patel heard the appeal filed by the Appellant against the judgment of Delhi High Court. The case relates to property dispute between two brothers regarding ancestral property located in Rohini, Delhi. The Court held that partition of ancestral property must be done as per Hindu Succession Act provisions and both parties have equal rights.",

    "Criminal Appeal No. 567/2023 before the High Court of Karnataka at Bangalore. The Appellant challenged the conviction under Section 420 of Indian Penal Code by the Sessions Court. The case involved financial fraud of Rs. 5,00,000 through fake documents. The High Court upheld the conviction but reduced the sentence from 3 years to 2 years imprisonment considering the appellant's clean antecedents and partial recovery of the amount."
]

# Document labels for classification
document_labels = [
    'Sale Agreement', 'Sale Agreement',
    'Rental Agreement', 'Rental Agreement',
    'Employment Contract', 'Employment Contract',
    'Court Judgment', 'Court Judgment'
]

class LegalDocumentAnalyzer:
    def __init__(self):
        self.vectorizer = TfidfVectorizer(
            max_features=1000,
            stop_words='english',
            ngram_range=(1, 2),  # Include bigrams for better legal context
            min_df=1
        )
        self.classifier = Pipeline([
            ('tfidf', TfidfVectorizer(max_features=500, stop_words='english')),
            ('classifier', MultinomialNB())
        ])

    def fit(self, documents, labels):
        """Train the document classifier"""
        self.classifier.fit(documents, labels)

    def find_similar_documents(self, documents, query_doc_index, top_k=3):
        """Find most similar documents using TF-IDF cosine similarity"""
        # Vectorize all documents
        tfidf_matrix = self.vectorizer.fit_transform(documents)

        # Calculate cosine similarity
        cosine_sim = cosine_similarity(tfidf_matrix)

        # Get similarity scores for the query document
        sim_scores = cosine_sim[query_doc_index]

        # Get top-k most similar documents (excluding the query document itself)
        similar_indices = np.argsort(sim_scores)[::-1][1:top_k+1]

        results = []
        for idx in similar_indices:
            results.append({
                'document_index': idx,
                'similarity_score': sim_scores[idx],
                'document_preview': documents[idx][:100] + "..."
            })

        return results

    def classify_document(self, document):
        """Classify a new document"""
        prediction = self.classifier.predict([document])[0]
        probabilities = self.classifier.predict_proba([document])[0]

        # Get class names and their probabilities
        class_names = self.classifier.classes_
        class_probs = {class_names[i]: probabilities[i] for i in range(len(class_names))}

        return prediction, class_probs

    def extract_key_terms(self, documents, top_n=10):
        """Extract most important terms using TF-IDF scores"""
        tfidf_matrix = self.vectorizer.fit_transform(documents)
        feature_names = self.vectorizer.get_feature_names_out()

        # Calculate mean TF-IDF scores across all documents
        mean_scores = np.mean(tfidf_matrix.toarray(), axis=0)

        # Get top terms
        top_indices = np.argsort(mean_scores)[::-1][:top_n]
        top_terms = [(feature_names[i], mean_scores[i]) for i in top_indices]

        return top_terms

# Demonstration
def run_demo():
    print("=== Legal Document Similarity and Classification Demo ===\n")

    analyzer = LegalDocumentAnalyzer()

    # Train classifier
    analyzer.fit(sample_documents, document_labels)
    print("✓ Document classifier trained on sample legal documents\n")

    # Demo 1: Document Similarity
    print("1. DOCUMENT SIMILARITY ANALYSIS")
    print("-" * 40)
    query_index = 0  # First sale agreement
    print(f"Query Document (Sale Agreement):")
    print(f"'{sample_documents[query_index][:150]}...'\n")

    similar_docs = analyzer.find_similar_documents(sample_documents, query_index)

    print("Most Similar Documents:")
    for i, doc in enumerate(similar_docs, 1):
        print(f"{i}. Similarity Score: {doc['similarity_score']:.3f}")
        print(f"   Document Type: {document_labels[doc['document_index']]}")
        print(f"   Preview: {doc['document_preview']}\n")

    # Demo 2: Document Classification
    print("2. DOCUMENT CLASSIFICATION")
    print("-" * 40)

    # Test with a new document
    new_document = """This Employment Agreement is entered into between Tech Solutions India Pvt Ltd,
    a company registered in Pune, Maharashtra and Mr. Vikash Kumar, aged 31 years.
    The employee is hired as Data Scientist with annual package of Rs. 15,00,000.
    The terms include 6 months probation and standard benefits as per company policy."""

    print(f"New Document to Classify:")
    print(f"'{new_document}'\n")

    prediction, probabilities = analyzer.classify_document(new_document)
    print(f"Predicted Category: {prediction}")
    print("Classification Probabilities:")
    for category, prob in sorted(probabilities.items(), key=lambda x: x[1], reverse=True):
        print(f"  {category}: {prob:.3f}")

    print("\n3. KEY LEGAL TERMS ANALYSIS")
    print("-" * 40)
    key_terms = analyzer.extract_key_terms(sample_documents)
    print("Most Important Terms in Legal Documents:")
    for i, (term, score) in enumerate(key_terms, 1):
        print(f"{i:2d}. {term:<20} (TF-IDF: {score:.3f})")

if __name__ == "__main__":
    run_demo()

#Legal Database Search

1. Show how keyword search misses relevant documents
2. Demonstrate semantic search finding conceptually related cases
3. Explain query expansion with legal terminology

In [None]:
import requests
from bs4 import BeautifulSoup
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from urllib.parse import quote_plus
import time

class EnhancedLegalSearch:
    """
    Demonstrates AI-enhanced search capabilities for legal databases
    Compares traditional keyword search vs semantic search
    """

    def __init__(self):
        self.vectorizer = TfidfVectorizer(
            max_features=5000,
            stop_words='english',
            ngram_range=(1, 3),
            min_df=1,
            max_df=0.95
        )

        # Sample Indian legal database (simulating Indian Kanoon-like data)
        self.sample_cases = [
            {
                'title': 'Rajesh Kumar vs State of Maharashtra - Property Dispute',
                'content': 'The Hon\'ble High Court of Bombay in this case dealt with the issue of property rights under the Hindu Succession Act. The petitioner claimed ancestral property rights over agricultural land in Pune district. The court held that agricultural land acquired by father through his own efforts cannot be claimed as ancestral property by sons. The judgment clarifies the distinction between ancestral and self-acquired property under Hindu law.',
                'citation': '2023 BHC 1234',
                'court': 'Bombay High Court',
                'year': 2023,
                'keywords': ['property rights', 'Hindu Succession Act', 'ancestral property', 'agricultural land']
            },
            {
                'title': 'ABC Technologies Ltd vs Income Tax Commissioner - Tax Assessment',
                'content': 'The Supreme Court in this landmark judgment addressed the issue of software development expenses and their treatment under Income Tax Act. The company claimed that software development costs should be treated as revenue expenditure rather than capital expenditure. The court ruled in favor of the taxpayer, holding that software development in IT companies is part of regular business operations and qualifies for revenue treatment.',
                'citation': '2023 SC 5678',
                'court': 'Supreme Court of India',
                'year': 2023,
                'keywords': ['income tax', 'software development', 'revenue expenditure', 'capital expenditure']
            },
            {
                'title': 'Priya Sharma vs Deepak Sharma - Matrimonial Dispute',
                'content': 'Delhi High Court judgment on maintenance under Section 125 CrPC and domestic violence under Protection of Women from Domestic Violence Act 2005. The wife sought maintenance and protection from mental cruelty. The court awarded monthly maintenance of Rs. 30,000 and issued protection orders. The judgment emphasizes that mental cruelty constitutes domestic violence and economic abuse through denial of maintenance is punishable.',
                'citation': '2023 DHC 9012',
                'court': 'Delhi High Court',
                'year': 2023,
                'keywords': ['maintenance', 'domestic violence', 'mental cruelty', 'Section 125 CrPC']
            },
            {
                'title': 'State of Karnataka vs Ramesh Iyer - Criminal Appeal',
                'content': 'Karnataka High Court judgment in criminal appeal under Section 420 IPC (cheating). The accused was convicted for cheating investors through fake investment schemes promising high returns. The court upheld conviction but reduced sentence considering first-time offender status and partial recovery of defrauded amount. The judgment provides guidelines on sentencing in financial fraud cases.',
                'citation': '2023 KHC 3456',
                'court': 'Karnataka High Court',
                'year': 2023,
                'keywords': ['Section 420 IPC', 'cheating', 'financial fraud', 'investment schemes']
            },
            {
                'title': 'XYZ Corporation vs Union of India - Constitutional Challenge',
                'content': 'Supreme Court constitutional bench judgment challenging validity of certain provisions of the Insolvency and Bankruptcy Code 2016. The corporate debtor argued that the IBC provisions violate principles of natural justice and due process. The court upheld the constitutional validity of IBC while providing clarifications on procedural safeguards for corporate debtors during insolvency proceedings.',
                'citation': '2023 SC 7890',
                'court': 'Supreme Court of India',
                'year': 2023,
                'keywords': ['Insolvency and Bankruptcy Code', 'constitutional validity', 'natural justice', 'corporate debtor']
            },
            {
                'title': 'Sunita Devi vs State of UP - Land Acquisition',
                'content': 'Allahabad High Court judgment on land acquisition under Right to Fair Compensation and Transparency in Land Acquisition Act 2013. Farmers challenged land acquisition for industrial project claiming inadequate compensation and lack of proper consultation. The court ordered fresh assessment of compensation and mandated proper rehabilitation package. The judgment reinforces farmers\' rights in land acquisition.',
                'citation': '2023 AHC 2345',
                'court': 'Allahabad High Court',
                'year': 2023,
                'keywords': ['land acquisition', 'fair compensation', 'rehabilitation', 'farmers rights']
            }
        ]

        self.case_texts = [case['content'] for case in self.sample_cases]
        self.vectorizer.fit(self.case_texts)

    def traditional_keyword_search(self, query, max_results=3):
        """Simulate traditional keyword-based search"""
        query_words = query.lower().split()
        results = []

        for i, case in enumerate(self.sample_cases):
            score = 0
            searchable_text = (case['title'] + ' ' + case['content'] + ' ' + ' '.join(case['keywords'])).lower()

            # Simple keyword matching with basic scoring
            for word in query_words:
                if word in searchable_text:
                    score += searchable_text.count(word)

            if score > 0:
                results.append({
                    'case': case,
                    'score': score,
                    'match_type': 'keyword'
                })

        # Sort by score and return top results
        results.sort(key=lambda x: x['score'], reverse=True)
        return results[:max_results]

    def semantic_search(self, query, max_results=3):
        """AI-enhanced semantic search using TF-IDF and cosine similarity"""
        # Vectorize the query
        query_vector = self.vectorizer.transform([query])

        # Vectorize all case documents
        case_vectors = self.vectorizer.transform(self.case_texts)

        # Calculate cosine similarity
        similarities = cosine_similarity(query_vector, case_vectors)[0]

        # Get top results
        top_indices = np.argsort(similarities)[::-1][:max_results]

        results = []
        for idx in top_indices:
            if similarities[idx] > 0.01:  # Minimum relevance threshold
                results.append({
                    'case': self.sample_cases[idx],
                    'score': similarities[idx],
                    'match_type': 'semantic'
                })

        return results

    def enhanced_search_with_suggestions(self, query):
        """Enhanced search with query expansion and suggestions"""
        # Legal term synonyms and expansions
        legal_expansions = {
            'property': ['property', 'land', 'real estate', 'immovable property'],
            'divorce': ['divorce', 'matrimonial', 'marriage dissolution', 'marital dispute'],
            'tax': ['tax', 'taxation', 'income tax', 'revenue', 'assessment'],
            'fraud': ['fraud', 'cheating', 'deception', 'misrepresentation', '420 IPC'],
            'contract': ['contract', 'agreement', 'covenant', 'understanding'],
            'inheritance': ['inheritance', 'succession', 'ancestral property', 'Hindu Succession Act']
        }

        # Expand query with legal synonyms
        expanded_query = query.lower()
        for term, synonyms in legal_expansions.items():
            if term in expanded_query:
                expanded_query += ' ' + ' '.join(synonyms)

        return self.semantic_search(expanded_query)

    def compare_search_methods(self, query):
        """Compare traditional vs AI-enhanced search results"""
        print(f"Search Query: '{query}'\n")
        print("=" * 60)

        # Traditional search
        print("TRADITIONAL KEYWORD SEARCH:")
        print("-" * 30)
        traditional_results = self.traditional_keyword_search(query)

        if not traditional_results:
            print("No results found with keyword search.\n")
        else:
            for i, result in enumerate(traditional_results, 1):
                case = result['case']
                print(f"{i}. {case['title']}")
                print(f"   Citation: {case['citation']}")
                print(f"   Keyword Matches: {result['score']}")
                print(f"   Preview: {case['content'][:120]}...\n")

        # Semantic search
        print("AI-ENHANCED SEMANTIC SEARCH:")
        print("-" * 30)
        semantic_results = self.semantic_search(query)

        if not semantic_results:
            print("No results found with semantic search.\n")
        else:
            for i, result in enumerate(semantic_results, 1):
                case = result['case']
                print(f"{i}. {case['title']}")
                print(f"   Citation: {case['citation']}")
                print(f"   Similarity Score: {result['score']:.3f}")
                print(f"   Preview: {case['content'][:120]}...\n")

        # Enhanced search with suggestions
        print("ENHANCED SEARCH WITH LEGAL TERM EXPANSION:")
        print("-" * 45)
        enhanced_results = self.enhanced_search_with_suggestions(query)

        if enhanced_results:
            for i, result in enumerate(enhanced_results, 1):
                case = result['case']
                print(f"{i}. {case['title']}")
                print(f"   Citation: {case['citation']}")
                print(f"   Enhanced Score: {result['score']:.3f}")
                print(f"   Court: {case['court']}")
                print(f"   Key Legal Areas: {', '.join(case['keywords'])}")
                print(f"   Preview: {case['content'][:100]}...\n")

def simulate_indian_kanoon_search(query):
    """
    Simulate how Indian Kanoon search works (basic keyword matching)
    This function shows what current legal databases typically do
    """
    print(f"Simulating Indian Kanoon Search for: '{query}'")
    print("Current Approach: Simple keyword matching in titles and content")
    print("Limitations: Misses semantic relationships, requires exact keyword matches")
    print("Example: Searching 'matrimonial dispute' won't find 'divorce' cases\n")

def demo_search_enhancement():
    """Main demo function"""
    print("=== Legal Database Search Enhancement Demo ===\n")

    search_engine = EnhancedLegalSearch()

    # Demo queries showing different search scenarios
    demo_queries = [
        "property inheritance dispute",
        "software company tax issues",
        "wife maintenance domestic violence",
        "investment fraud cheating case"
    ]

    for query in demo_queries:
        print("🔍 " + "="*60)
        search_engine.compare_search_methods(query)
        print("="*60 + "\n")
        time.sleep(1)  # Pause for readability

    # Show technical architecture
    print("TECHNICAL ARCHITECTURE:")
    print("-" * 25)
    print("1. Document Preprocessing: Tokenization, stop word removal")
    print("2. TF-IDF Vectorization: Convert text to numerical vectors")
    print("3. Cosine Similarity: Measure semantic similarity between query and documents")
    print("4. Query Expansion: Add legal synonyms and related terms")
    print("5. Ranking Algorithm: Score and rank results by relevance")
    print("\nBenefits over traditional search:")
    print("• Finds semantically related content, not just exact keyword matches")
    print("• Understands legal terminology and relationships")
    print("• Provides relevance scoring for better result ranking")
    print("• Can be extended with legal ontologies and case law relationships")

if __name__ == "__main__":
    demo_search_enhancement()

#Drafting: Time Saving

In [None]:
"""
Comparison: Template-Based vs AI-Powered Legal Drafting
Shows the real value proposition of AI over traditional templates
"""

class TemplateBasedDrafting:
    """Traditional approach - rigid templates with fill-in-the-blanks"""

    def generate_clause(self, clause_type, standard_params):
        templates = {
            'indemnity': "Each party shall indemnify and hold harmless the other party from and against any claims, damages, losses, costs and expenses arising out of any breach of this Agreement.",
            'termination': "This Agreement may be terminated by either party upon {notice_period} days written notice.",
            'jurisdiction': "This Agreement shall be governed by the laws of India and subject to jurisdiction of courts at {location}."
        }

        return templates.get(clause_type, '').format(**standard_params)

class AIBasedDrafting:
    """AI approach - contextual, flexible generation"""

    def generate_clause(self, clause_type, context):
        # This simulates what AI actually does - contextual generation

        if clause_type == 'indemnity':
            if context.get('high_risk_business'):
                return """The Borrower shall indemnify, defend and hold harmless the Lender, its officers, directors, employees and agents from and against any and all claims, damages, losses, costs and expenses (including reasonable attorneys' fees) arising out of or relating to: (a) any breach of this Agreement by the Borrower; (b) any negligent acts or omissions of the Borrower; (c) any violation of applicable laws by the Borrower; and (d) any third-party claims related to the Borrower's business operations, including but not limited to regulatory compliance issues in the fintech sector."""

            elif context.get('startup_borrower'):
                return """The Borrower shall, to the extent of its assets and insurance coverage, indemnify the Lender against direct damages arising from material breaches of this Agreement. Given the early-stage nature of the Borrower's business, this indemnity shall be limited to [X]% of the loan amount and shall not apply to losses arising from general business risks inherent to startup operations."""

            else:
                return """Each party shall indemnify and hold harmless the other party from and against any claims, damages, losses, costs and expenses arising out of any breach of this Agreement or negligent acts or omissions of the indemnifying party."""

        elif clause_type == 'termination':
            if context.get('venture_debt'):
                return """This Agreement may be terminated: (a) by mutual consent of the parties; (b) by the Lender immediately upon occurrence of any Event of Default; (c) by the Borrower upon 90 days written notice, subject to payment of prepayment premium as specified in Schedule [X]; (d) automatically upon completion of the Borrower's Series A funding round exceeding USD [amount], provided all outstanding amounts are repaid."""

            elif context.get('working_capital_loan'):
                return """Either party may terminate this revolving credit facility upon 30 days written notice. The Borrower may prepay outstanding amounts without penalty. Upon termination, all outstanding principal, accrued interest, and fees shall become immediately due and payable."""

        return f"AI-generated {clause_type} clause based on context: {context}"

def demonstrate_difference():
    """Show the key differences between template and AI approaches"""

    print("=== TEMPLATE vs AI LEGAL DRAFTING COMPARISON ===\n")

    # Scenario 1: Standard business loan
    print("📋 SCENARIO 1: Standard Business Loan")
    print("="*50)

    template_drafter = TemplateBasedDrafting()
    ai_drafter = AIBasedDrafting()

    standard_context = {'notice_period': '30', 'location': 'Mumbai'}

    print("Template Approach:")
    print(template_drafter.generate_clause('indemnity', standard_context))
    print("\nAI Approach (same context):")
    print(ai_drafter.generate_clause('indemnity', {}))
    print("\n" + "="*60 + "\n")

    # Scenario 2: High-risk fintech lending
    print("🚀 SCENARIO 2: Fintech Startup Loan (High Risk)")
    print("="*50)

    fintech_context = {'high_risk_business': True, 'sector': 'fintech'}

    print("Template Approach (same template):")
    print(template_drafter.generate_clause('indemnity', standard_context))
    print("\nAI Approach (context-aware):")
    print(ai_drafter.generate_clause('indemnity', fintech_context))
    print("\n" + "="*60 + "\n")

    # Scenario 3: Venture debt with equity features
    print("💰 SCENARIO 3: Venture Debt Agreement")
    print("="*50)

    venture_context = {'venture_debt': True, 'equity_conversion': True}

    print("Template Approach (doesn't handle complex scenarios):")
    print(template_drafter.generate_clause('termination', {'notice_period': '30'}))
    print("\nAI Approach (handles complexity):")
    print(ai_drafter.generate_clause('termination', venture_context))
    print("\n" + "="*60 + "\n")

def show_real_ai_capabilities():
    """Demonstrate what AI actually brings to legal drafting"""

    print("🤖 REAL AI CAPABILITIES IN LEGAL DRAFTING:")
    print("="*50)

    capabilities = {
        "Contextual Understanding": [
            "Understands business type and adjusts language accordingly",
            "Recognizes risk factors and adds appropriate protections",
            "Adapts to different industries (fintech, healthcare, manufacturing)"
        ],

        "Legal Reasoning": [
            "Knows which laws apply to specific situations",
            "Understands relationships between different clauses",
            "Can balance competing interests (lender vs borrower protection)"
        ],

        "Dynamic Generation": [
            "Creates new clauses for unique situations",
            "Combines multiple legal concepts intelligently",
            "Adapts language complexity based on document purpose"
        ],

        "Compliance Awareness": [
            "Considers regulatory requirements automatically",
            "Updates language based on recent legal changes",
            "Flags potential compliance issues"
        ]
    }

    for capability, examples in capabilities.items():
        print(f"\n{capability}:")
        for example in examples:
            print(f"  ✓ {example}")

    print(f"\n{'='*60}")
    print("KEY INSIGHT: Templates are static, AI is dynamic and contextual!")
    print("="*60)

def practical_demo_suggestions():
    """Suggest better ways to demo AI value in your session"""

    print("\n🎯 BETTER DEMO APPROACH FOR YOUR SESSION:")
    print("="*50)

    print("""
1. SIDE-BY-SIDE COMPARISON:
   • Show same complex scenario to both approaches
   • Template: Generic, one-size-fits-all clause
   • AI: Contextual, situation-specific clause

2. PROGRESSIVE COMPLEXITY:
   • Start simple: "Standard loan agreement"
   • Add complexity: "Startup borrower with IP as collateral"
   • Add more: "Convertible debt with board rights"
   • Show how templates break down, AI adapts

3. REAL-TIME ADAPTATION:
   • Ask audience for specific scenario
   • Show AI generating appropriate clauses live
   • Demonstrate how AI considers multiple factors simultaneously

4. LEGAL RESEARCH INTEGRATION:
   • Show AI referencing recent case law
   • Demonstrate how it adapts to regulatory changes
   • Show citation of relevant statutes automatically

5. RISK ASSESSMENT:
   • AI identifies potential issues in draft
   • Suggests protective clauses based on business type
   • Templates can't do this contextual risk analysis
""")

if __name__ == "__main__":
    demonstrate_difference()
    show_real_ai_capabilities()
#    practical_demo_suggestions()