In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import json
from ingestion.md_loader import load_md
from ingestion.splitter import split_contract
from vectorstore.chroma_store import add_to_chroma
from rag.extractor import extract_contract_pricing
from rules.pricing_comparator import detect_leakage
from rag.retriever import retrieve_contract_context

In [3]:
# INGEST CONTRACT
text = load_md("Data/Axis_bank/Axis_COM_2025_001.md")
docs = split_contract(text, "Axis_COM_2025_001.md")
# add_to_chroma(docs)



In [4]:
docs

[Document(metadata={'source': 'Axis_COM_2025_001.md', 'doc_type': 'contract', 'section': 'general'}, page_content='COMMERCIAL BANKING SERVICES AGREEMENT Agreement ID: Axis/COM/2025/001 Bank: Axis Bank Limited Client: Bajaj Auto Limited Registration: CIN: L35911MH1945PLC004103 Effective Date: February 3, 2025 Expiry Date: February 03, 2027 Duration: 2 Year(s) Total Pages: 26 28 PARTIES AND AGREEMENT OVERVIEW BANK: Axis Bank Limited Limited, Mumbai, India (Licensed by Reserve Bank of India) CLIENT: Bajaj Auto Limited, Pune, India (Registration: CIN: L35911MH1945PLC004103) This Commercial Banking Services Agreement ("Agreement") establishes comprehensive banking relationship between the Bank and Client covering deposit accounts, payment processing, international transfers, trade finance, and working capital facilities.'),
 Document(metadata={'source': 'Axis_COM_2025_001.md', 'doc_type': 'contract', 'section': 'general'}, page_content='1. INTRODUCTION AND BANKING RELATIONSHIP FRAMEWORK 1.1

In [5]:
CLASSIFY_PRICING_PROMPT = """
You are a contract analysis assistant.

Your task is to classify the following contract text into ONE category.

CATEGORIES:
- PRICING_RELATED
- NON_PRICING

PRICING_RELATED if the text contains ANY of the following:
- fees, charges, costs, pricing
- monetary amounts (₹, $, INR, USD, etc.)
- rates or percentages (interest, escalation, markup, penalty)
- service descriptions that mention prices or rates
- credit facilities with interest rates or fees
- SLA clauses that include financial penalties or credits
- payment terms, minimum fees, late payment interest

NON_PRICING if the text:
- contains NO prices, fees, rates, or monetary terms
- is purely legal, descriptive, operational, or procedural

RULES:
- Ignore section titles; judge ONLY by content.
- If even ONE price or rate appears → PRICING_RELATED.
- Do NOT extract data.
- Do NOT explain reasoning.
- Respond with ONLY ONE word.

TEXT:
<<<
{text}
>>>

Answer:
"""


In [6]:
import ollama

def classify_chunk(text: str, model: str = "llama3.1") -> str:
    prompt = CLASSIFY_PRICING_PROMPT.format(text=text)

    response = ollama.chat(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        options={"temperature": 0}
    )

    label = response["message"]["content"].strip()
    return label


In [11]:
docs=retrieve_contract_context("Axis_COM_2025_001.md")
context = "\n\n".join(d.page_content for d in docs)
context

"automatically initiates payments on scheduled dates without requiring separate authorization for each transaction. 2.3 International Payment Services and Cross Border Transfers SWIFT Wire Transfers: Bajaj Auto Limited provides remittance details including beneficiary name, SWIFT code, account number, amount, and currency. Bank verifies details for FATCA compliance, prepares SWIFT message in MT103 format, transmits through SWIFT network, and provides settlement confirmation. FATCA Compliance: Bank conducts FATCA screening by cross referencing beneficiary against OFAC sanctions lists, verifying tax residency status, and maintaining compliance documentation. Currency Conversion: For international payments requiring currency conversion from INR to foreign currency, Bank applies published\n\ncurrency conversion from INR to foreign currency, Bank applies published daily exchange rate with markup as specified in pricing schedule. Correspondent Charges: Bank recovers charges levied by corresp

In [16]:
docs

[Document(metadata={'source': 'Axis_COM_2025_001.md', 'doc_type': 'contract'}, page_content='COMMERCIAL BANKING SERVICES AGREEMENT Agreement ID: Axis/COM/2025/001 Bank: Axis Bank Limited Client: Bajaj Auto Limited Registration: CIN: L35911MH1945PLC004103 Effective Date: February 3, 2025 Expiry Date: February 03, 2027 Duration: 2 Year(s) Total Pages: 26 28 PARTIES AND AGREEMENT OVERVIEW BANK: Axis Bank Limited Limited, Mumbai, India (Licensed by Reserve Bank of India) CLIENT: Bajaj Auto Limited, Pune, India (Registration: CIN: L35911MH1945PLC004103) This Commercial Banking Services Agreement ("Agreement") establishes comprehensive banking relationship between the Bank and Client covering deposit accounts, payment processing, international transfers, trade finance, and working capital facilities. 1. INTRODUCTION AND BANKING RELATIONSHIP FRAMEWORK 1.1 Nature of Agreement and Scope of'),
 Document(metadata={'source': 'Axis_COM_2025_001.md', 'doc_type': 'contract'}, page_content='AND BANKIN

In [9]:
extracted = extract_contract_pricing("Axis_COM_2025_001.md")
extracted

"However, I don't see the contract text provided in the context. Please provide it so I can extract the pricing terms according to the rules specified.\n\nOnce you provide the contract text, I'll proceed with extracting the pricing terms and formatting them into a JSON array as per the output format rules.\n\nPlease paste the contract text in the format <<context>> is currently occupying."

In [16]:
import json
from pathlib import Path

def load_standard_pricing(path="data/standard_pricing.json"):
    path = Path(path)
    if not path.exists():
        raise FileNotFoundError(f"Standard pricing file not found: {path}")

    with open(path, "r") as f:
        data = json.load(f)

    return data


In [17]:

def get_standard_service_names():
    catalog = load_standard_pricing()
    return list(catalog.keys())


In [18]:
STANDARD_SERVICES

['Account Maintenance',
 'Monthly Reconciliation Report',
 'Consolidated Reporting',
 'ACH Origination Fee',
 'ACH Transaction Fee',
 'RTGS Processing',
 'NEFT Processing',
 'Domestic Wire Transfer',
 'International Wire Transfer',
 'FATCA Compliance Screening',
 'Currency Conversion',
 'Annual Facility Review Charge',
 'Documentation/Agreement Preparation',
 'Disbursement Fee',
 'Facility Enhancement',
 'Online Banking Portal Access',
 'API Integration Setup']

In [19]:
STANDARD_SERVICES = get_standard_service_names()


In [20]:
def compare_with_standard(extracted_items, standard_catalog):
    abnormalities = []

    for item in extracted_items:
        standard_name = item.get("standard_service_name")

        # Skip extras / non-standard services
        if standard_name is None:
            continue

        std = standard_catalog.get(standard_name)

        # Skip if standard service not found in catalog
        if not std:
            continue

        contract_price = item.get("contract_price")
        standard_price = std.get("price")

        # Skip if price missing
        if contract_price is None or standard_price is None:
            continue

        # Compare prices (ONLY abnormalities)
        if contract_price < standard_price:
            leakage_status = "BELOW_STANDARD_PRICE"
        elif contract_price > standard_price:
            leakage_status = "ABOVE_STANDARD_PRICE"
        else:
            # Matches standard → ignore
            continue

        item.update({
            "standard_price": standard_price,
            "standard_unit": std.get("unit"),
            "standard_currency": std.get("currency"),
            "standard_escalation_rate": std.get("escalation_rate"),
            "price_delta": contract_price - standard_price,
            "leakage_status": leakage_status
        })

        abnormalities.append(item)

    return abnormalities


In [21]:
standard_catalog = load_standard_pricing("data/standard_pricing.json")

# extracted = result of your extraction + post-processing steps
leakage_results = compare_with_standard(extracted_11, standard_catalog)

In [22]:
leakage_results

[{'service': 'ACH Origination Fee',
  'standard_service_name': 'ACH Origination Fee',
  'contract_price': 400,
  'unit': 'per ACH file submitted',
  'source_clause': 'The Bank charges 400 rupees per ACH file submitted for processing.',
  'unit_inferred': False,
  'applicability': None,
  'standard_price': 500,
  'standard_unit': 'per_file',
  'standard_currency': 'INR',
  'standard_escalation_rate': 3.5,
  'price_delta': -100,
  'leakage_status': 'BELOW_STANDARD_PRICE'},
 {'service': 'ACH Transaction Fee',
  'standard_service_name': 'ACH Transaction Fee',
  'contract_price': 2.0,
  'unit': 'per individual transaction processed within an ACH file',
  'source_clause': 'In addition to the file origination fee, the Bank charges 2.00 rupees per individual transaction processed within an ACH file.',
  'unit_inferred': False,
  'applicability': None,
  'standard_price': 2.5,
  'standard_unit': 'per_transaction',
  'standard_currency': 'INR',
  'standard_escalation_rate': 3.5,
  'price_delta':

In [1]:


# # INGEST CONTRACT
# text = load_md("data/contract/ZCB_COM_2025_002.md")
# docs = split_contract(text, "ZCB_COM_2025_002.md")
# add_to_chroma(docs)

# # EXTRACT PRICING
# extracted = extract_contract_pricing("ZCB_COM_2025_002.md")
# extracted

In [3]:
# INGEST CONTRACT
text = load_md("data/contract/ZCB_COM_2025_001.md")
docs = split_contract(text, "ZCB_COM_2025_001.md")
add_to_chroma(docs)

# EXTRACT PRICING
extracted = extract_contract_pricing("ZCB_COM_2025_001.md")
extracted

[{'service': 'ACH Origination',
  'contract_price': 400,
  'unit': 'rupee',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3.8 Payment Services'},
 {'service': 'ACH Transaction',
  'contract_price': 2.5,
  'unit': 'rupee',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3.8 Payment Services'},
 {'service': 'RTGS Processing',
  'contract_price': 50,
  'unit': 'rupee',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3.8 Payment Services'},
 {'service': 'NEFT Processing',
  'contract_price': 25,
  'unit': 'rupee',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3.8 Payment Services'},
 {'service': 'Domestic Wire Transfer',
  'contract_price': 100,
  'unit': 'rupee',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3.8 Payment Services'},
 {'service': 'ACH via API',
  'contract_price': 25000,
  'unit': 'rupee',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3

In [2]:
from rag.extractor import extract_contract_pricing
extract_contract_pricing("ZCB_COM_2025_004.md")

  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
  return Chroma(


[{'service': 'ACH Origination',
  'contract_price': 500,
  'unit': 'rupees',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3.3.1 Domestic Payment Services'},
 {'service': 'ACH Transaction',
  'contract_price': 2.5,
  'unit': 'rupees',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': '3.3.1 Domestic Payment Services'},
 {'service': 'RTGS Processing Fee',
  'contract_price': 50,
  'unit': 'rupees',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': 'Domestic Payment Services'},
 {'service': 'NEFT Processing Fee',
  'contract_price': 25,
  'unit': 'rupees',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': 'Domestic Payment Services'},
 {'service': 'Domestic Wire Transfer Fee',
  'contract_price': 100,
  'unit': 'rupees',
  'minimum_fee': None,
  'escalation_rate': None,
  'source_clause': 'Domestic Payment Services'},
 {'service': 'International Wire Transfer Fee (SWIFT)',
  'contract_price': 850,
  'unit': 