# MCA Comparison Agent (Multi-Query)

This notebook compares two PPP concession documents using a two-agent framework over multiple queries, then generates a DOCX report.

**Pipeline:**
1. Agent 1 selects relevant section IDs for each query.
2. Agent 2 produces a structured comparison output.
3. Each query output is saved as JSON under `doc_compare/{DOC_A}_&{DOC_B}_compare/`.
4. A DOCX report is generated from those JSON files.

Prerequisites:
- `outputs/<DOCUMENT_NAME>/` contains the 12 section JSON files.
- `OPENAI_API_KEY` set in `.env`.


In [14]:
from pathlib import Path
import json
import os
import re
from typing import Dict, List

from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from docx import Document
from docx.shared import Pt

load_dotenv(dotenv_path=Path('.env'), override=True)


True

In [25]:
# =========================
# CONFIG
# =========================

DOC_A = "Adani  Mundra Port agreement for operations"
DOC_B = "Bahuli_IOCL_compressed"

OUTPUTS_DIR = Path("outputs")
COMPARE_ROOT = Path("doc_compare")
COMPARE_DIR = COMPARE_ROOT / f"{DOC_A}_&{DOC_B}_compare"
COMPARE_DIR.mkdir(parents=True, exist_ok=True)

# Multi-query framework
QUERIES = [
    {
        "id": 1,
        "title": "Scope of Concession & Rights Granted",
        "query": (
            "Which MCA grants greater commercial autonomy to the Concessionaire through "
            "the scope of concession and rights granted, and how does this affect the "
            "ability to undertake ancillary activities, optimize operations, and "
            "generate additional revenue?"
            "Compare both agreements using explicit rights, implied limitations, and "
            "material silence, supported by clause evidence and evidence strength assessment."
        )
    },
    {
        "id": 2,
        "title": "Tariff & Revenue Flexibility",
        "query": (
            "Which MCA provides greater flexibility in tariff setting and revenue "
            "generation for the Concessionaire, and how does this impact cash flow "
            "certainty and lender comfort?"
            "Compare both agreements based on explicit pricing clauses, regulatory "
            "controls, implicit constraints, and contractual silence, with clause "
            "evidence and evidence strength assessment."
        )
    },
    {
        "id": 3,
        "title": "Asset Ownership & Control",
        "query": (
            "Which MCA allocates asset ownership and operational control more favorably "
            "to the Concessionaire, and how does this affect control over operations, "
            "monetization potential, and lender security?"
            "Compare both agreements using ownership clauses, control rights, "
            "restrictions, and clause evidence with evidence strength assessment."
        )
    },
    {
        "id": 4,
        "title": "Demand & Traffic Risk",
        "query": (
            "Which MCA allocates demand or traffic risk more efficiently between the "
            "Authority and the Concessionaire, and how does this influence revenue "
            "stability, downside risk, and bankability?"
            "Compare both agreements based on explicit risk allocation, guarantees, "
            "exclusivity provisions, and material silence, supported by clause evidence "
            "and evidence strength assessment."
        )
    },
    {
        "id": 5,
        "title": "Regulatory Micromanagement",
        "query": (
            "Which MCA subjects the Concessionaire to greater regulatory or operational "
            "micromanagement, and how does this affect operational autonomy, compliance "
            "costs, and efficiency?"
            "Compare both agreements using explicit approval requirements, supervisory "
            "powers, implied control mechanisms, and clause evidence with evidence strength assessment."
        )
    },
    {
        "id": 6,
        "title": "Dispute Resolution",
        "query": (
            "Which MCA provides a more neutral, predictable, and enforceable dispute "
            "resolution framework, and how does this affect legal certainty and lender "
            "confidence?"
            "Compare both agreements based on arbitration structure, appointment "
            "mechanisms, governing law, enforcement certainty, and clause evidence with evidence strength assessment."
        )
    },
    {
        "id": 7,
        "title": "Termination",
        "query": (
            "Which MCA exposes the Concessionaire to higher termination risk, and how "
            "does this affect equity protection, cash flow continuity, and lender "
            "confidence?"
            "Compare both agreements using termination triggers, cure periods, "
            "compensation provisions, and clause evidence with evidence strength assessment."
        )
    },
    {
        "id": 8,
        "title": "Exit & End-of-Term Provisions",
        "query": (
            "Which MCA provides greater flexibility and certainty for exit and "
            "end-of-term outcomes for the Concessionaire, and how does this affect "
            "recoverability of investment and refinancing options?"
            "Compare both agreements based on exit rights, transfer provisions, "
            "handover obligations, residual value treatment, and clause evidence with evidence strength assessment."
        )
    }
]


In [26]:
# =========================
# HELPERS
# =========================

def load_section_json(doc_name: str) -> Dict[int, Dict]:
    doc_dir = OUTPUTS_DIR / doc_name
    if not doc_dir.exists():
        raise FileNotFoundError(f"Missing outputs folder: {doc_dir}")

    data = {}
    for path in sorted(doc_dir.glob("*.json")):
        payload = json.loads(path.read_text(encoding="utf-8"))
        data[payload["section_id"]] = payload
    return data


def make_llm() -> ChatOpenAI:
    return ChatOpenAI(model="gpt-4o", temperature=0, api_key=os.getenv("OPENAI_API_KEY"))


## Agent 1 — Section Extractor

Select minimum needed section IDs for each query.


In [27]:
AGENT1_PROMPT = ChatPromptTemplate.from_template('''
ROLE:
You are Agent 1 in a two-agent comparison pipeline.

TASK:
Given the user query, choose the minimum necessary sections from the 12-section PPP framework.
Return ONLY the section IDs as a comma-separated list (e.g., "6,7,8").

USER QUERY:
{user_query}

12 SECTION NAMES:
1. Context & Objective
2. Scope of Concession & Rights Granted
3. Asset Ownership & Control
4. Regulatory & Operational Compliance
5. Concession Period & Extension
6. Tariff & Revenue Flexibility
7. Demand & Traffic Risk Allocation
8. Change in Law & Policy Risk
9. Relief Structure
10. Termination & Step-in Rights
11. Dispute Resolution & Governing Law
12. Assignment & Financing Flexibility
''')


## Agent 2 — Comparison Analyst

Produces structured output for each query.


In [28]:
AGENT2_PROMPT = ChatPromptTemplate.from_template('''
ROLE:
You are Agent 2 in a two-agent comparison pipeline.

TASK:
Use the extracted section analyses from both documents to answer the user query.
Compare the two documents and produce a structured output exactly in this format:

Finding:
...

Comparison Insight:
...

Impact on Cash Flows:
...

Clause Evidence:
- Document A: <verbatim excerpts with clause/article numbers>
- Document B: <verbatim excerpts with clause/article numbers>

Evidence Strength: <HIGH | MEDIUM | LOW>

RULES:
- Base findings ONLY on the provided section analyses.
- Use clause evidence from those analyses.
- Do not speculate beyond the text.
- If evidence is missing, say so explicitly.

USER QUERY:
{user_query}

DOCUMENT A NAME:
{doc_a_name}

DOCUMENT B NAME:
{doc_b_name}

DOCUMENT A SECTION ANALYSES:
{doc_a_sections}

DOCUMENT B SECTION ANALYSES:
{doc_b_sections}
''')


In [29]:
# =========================
# RUN ALL QUERIES
# =========================

llm = make_llm()
agent1_chain = AGENT1_PROMPT | llm | StrOutputParser()
agent2_chain = AGENT2_PROMPT | llm | StrOutputParser()

doc_a_sections = load_section_json(DOC_A)
doc_b_sections = load_section_json(DOC_B)

results = []

for q in QUERIES:
    section_ids_raw = agent1_chain.invoke({"user_query": q["query"]})
    section_ids = [int(s.strip()) for s in section_ids_raw.split(",") if s.strip().isdigit()]

    selected_a = [doc_a_sections[sid] for sid in section_ids if sid in doc_a_sections]
    selected_b = [doc_b_sections[sid] for sid in section_ids if sid in doc_b_sections]

    payload_a = "".join([s["analysis"] for s in selected_a])
    payload_b = "".join([s["analysis"] for s in selected_b])

    final_answer = agent2_chain.invoke({
        "user_query": q["query"],
        "doc_a_name": DOC_A,
        "doc_b_name": DOC_B,
        "doc_a_sections": payload_a,
        "doc_b_sections": payload_b,
    })

    out_json = {
        "query_id": q["id"],
        "title": q["title"],
        "query": q["query"],
        "section_ids": section_ids,
        "analysis": final_answer
    }

    file_name = f"{q['id']:02d}_{q['title'].replace(' ', '_')}.json"
    out_path = COMPARE_DIR / file_name
    out_path.write_text(json.dumps(out_json, indent=2, ensure_ascii=False), encoding="utf-8")

    results.append(out_json)

print(f"Saved {len(results)} query outputs to: {COMPARE_DIR}")


Saved 8 query outputs to: doc_compare/Adani  Mundra Port agreement for operations_&Bahuli_IOCL_compressed_compare


## Generate DOCX from Query JSONs


In [34]:
from pathlib import Path
import json
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH


def generate_comparison_docx(
    compare_dir: Path,
    output_path: Path,
    doc_a: str,
    doc_b: str,
):
    """
    Generate a structured DOCX comparison report from JSON outputs.
    """

    json_files = sorted(compare_dir.glob("*.json"))

    # -------------------------
    # Initialize document
    # -------------------------
    doc = Document()

    normal_style = doc.styles["Normal"]
    normal_style.font.name = "Calibri"
    normal_style.font.size = Pt(11)

    # -------------------------
    # Title
    # -------------------------
    title = doc.add_heading(
        f"MCA Comparison Analysis: {doc_a} vs {doc_b}",
        level=0
    )
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # -------------------------
    # Content generation
    # -------------------------
    for json_file in json_files:
        data = json.loads(json_file.read_text(encoding="utf-8"))

        # Section heading
        doc.add_heading(
            f"{data['query_id']:02d}. {data['title']}",
            level=1
        )

        # Query text
        doc.add_paragraph(
            data["query"],
            style="Intense Quote"
        )

        # Analysis body
        lines = data["analysis"].splitlines()

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # Section headers (LEGAL POSITION, ECONOMIC IMPLICATION, etc.)
            if line.endswith(":") and line.isupper():
                doc.add_heading(
                    line.replace(":", ""),
                    level=2
                )

            # Bullet points
            elif line.startswith("- "):
                doc.add_paragraph(
                    line[2:].strip(),
                    style="List Bullet"
                )

            # Normal paragraph
            else:
                doc.add_paragraph(line)

        # Page break after each comparison query
        doc.add_page_break()

    # -------------------------
    # Footer disclaimer
    # -------------------------
    section = doc.sections[0]
    footer = section.footer
    footer_para = footer.paragraphs[0]

    footer_para.text = (
        "Disclaimer: This document has been generated using an AI-based "
        "analytical system. The content is intended for research and "
        "decision-support purposes only and should not be treated as legal advice."
    )
    footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

    for run in footer_para.runs:
        run.font.name = "Calibri"
        run.font.size = Pt(8)
        run.font.italic = True

    # -------------------------
    # Save document
    # -------------------------
    doc.save(output_path)
    print(f"DOCX saved: {output_path}")


In [35]:
OUTPUT_DOCX = COMPARE_ROOT / f"{DOC_A}_&_{DOC_B}_compare.docx"

generate_comparison_docx(
    compare_dir=COMPARE_DIR,
    output_path=OUTPUT_DOCX,
    doc_a=DOC_A,
    doc_b=DOC_B
)


DOCX saved: doc_compare/Adani  Mundra Port agreement for operations_&_Bahuli_IOCL_compressed_compare.docx


In [32]:
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt
# =========================
# ADD FOOTER DISCLAIMER
# =========================

section = doc.sections[0]
footer = section.footer
footer_para = footer.paragraphs[0]

footer_para.text = (
    "Disclaimer: This document has been generated using an AI-based "
    "analytical system. The content is intended for research and "
    "decision-support purposes only and should not be treated as legal advice."
)

footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

for run in footer_para.runs:
    run.font.name = "Calibri"
    run.font.size = Pt(8)
    run.font.italic = True
