# last 

In [1]:
CLASSIFY_PRICING_PROMPT = """
You are a contract analysis assistant.

Your task is to classify the following contract text into ONE category.

CATEGORIES:
- PRICING_RELATED
- NON_PRICING

PRICING_RELATED if the text contains ANY of the following:
- fees, charges, costs, pricing
- monetary amounts (₹, $, INR, USD, etc.)llama3.1
- rates or percentages (interest, escalation, markup, penalty)
- service descriptions that mention prices or rates
- credit facilities with interest rates or fees
- SLA clauses that include financial penalties or credits
- payment terms, minimum fees, late payment interest

NON_PRICING if the text:
- contains NO prices, fees, rates, or monetary terms
- is purely legal, descriptive, operational, or procedural

RULES:
- Ignore section titles; judge ONLY by content.
- If even ONE price or rate appears → PRICING_RELATED.
- Do NOT extract data.
- Do NOT explain reasoning.
- Respond with ONLY ONE word.

TEXT:
<<<
{text}
>>>

Answer:
"""


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import ollama

def classify_chunk(text: str, model: str = "llama3.1:8b") -> str:
    prompt = CLASSIFY_PRICING_PROMPT.format(text=text)

    response = ollama.chat(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        options={"temperature": 0}
    )

    label = response["message"]["content"].strip()
    return label


In [4]:
from langchain_core.documents import Document

def classify_documents(docs, model="llama3.1:8b"):
    classified_docs = []

    for doc in docs:
        label = classify_chunk(doc.page_content, model=model)

        # Defensive check
        if label not in {"PRICING_RELATED", "NON_PRICING"}:
            label = "NON_PRICING"

        doc.metadata["pricing_class"] = label
        classified_docs.append(doc)

    return classified_docs


In [5]:
def get_pricing_chunks(docs):
    return [
        d for d in docs
        if d.metadata.get("pricing_class") == "PRICING_RELATED"
    ]


In [1]:
from ingestion.splitter import split_contract
from ingestion.md_loader import load_md

# docs = output from your splitter
text = load_md("Data/Axis_bank/Axis_COM_2025_001.md")
docs = split_contract(text, "Data/Axis_bank/Axis_COM_2025_001.md")

In [2]:
docs

[Document(metadata={'source': 'Axis_COM_2025_001.md', 'doc_type': 'contract', 'section_number': None, 'pricing_class': None}, page_content='COMMERCIAL BANKING SERVICES AGREEMENT Agreement ID: Axis/COM/2025/001 Bank: Axis Bank Limited Client: Bajaj Auto Limited Registration: CIN: L35911MH1945PLC004103 Effective Date: February 3, 2025 Expiry Date: February 03, 2027 Duration: 2 Year(s) Total Pages: 26 28 PARTIES AND AGREEMENT OVERVIEW BANK: Axis Bank Limited Limited, Mumbai, India (Licensed by Reserve Bank of India) CLIENT: Bajaj Auto Limited, Pune, India (Registration: CIN: L35911MH1945PLC004103) This Commercial Banking Services Agreement ("Agreement") establishes comprehensive banking relationship between the Bank and Client covering deposit accounts, payment processing, international transfers, trade finance, and working capital facilities.'),
 Document(metadata={'source': 'Axis_COM_2025_001.md', 'doc_type': 'contract', 'section_number': 1, 'pricing_class': None}, page_content='1. INTR

In [8]:
docs = classify_documents(docs)

In [9]:
from vectorstore.chroma_store import add_to_chroma

In [10]:
check th

  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
  return Chroma(
  db.persist()


In [11]:
from rag.extractor import extract_contract_pricing
from rag.prompts import CONTRACT_EXTRACTION_PROMPT

In [12]:
from rag.retriever import get_pricing_chunks
docs=get_pricing_chunks("Axis_COM_2025_001.md")
context = "\n\n".join(
        f"[SECTION {d.metadata.get('section_number')}]\n{d.page_content}"
        for d in docs
    )
prompt = CONTRACT_EXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

In [13]:
prompt

'\nYou are a senior financial contract analyst AI.\n\nYour task is to EXTRACT ALL PRICING-RELATED TERMS from the contract text.\n\nPRICING TERMS INCLUDE (extract ALL that apply):\n- Flat fees (monthly, annual, one-time, per transaction, per report, etc.)\n- Percentage-based rates (interest, FX markup, escalation, penalties, handling charges)\n- Minimum fees\n- Conditional pricing (discounts, reductions, free allowances, frozen pricing)\n- Credit facility charges (review fees, minimum utilization fees)\n- SLA-linked service credits or penalties\n- Escalation clauses (annual or periodic)\n\nDO NOT restrict extraction to any specific section number.\nPricing terms may appear in:\n- Pricing / Fee sections\n- Credit facility sections\n- Payment terms\n- SLA sections\n- Special conditions / anomalies\n- Embedded sentences\n\nSTRICT RULES (MANDATORY):\n1. Extract ONLY values explicitly stated in the contract.\n2. DO NOT infer, calculate, normalize, or estimate.\n3. If a service matches a STAN

In [20]:
import ollama
response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        options={
        "temperature": 0.0,
        "num_ctx": 4096,
        "num_predict": 700,
        "top_p": 0.9,
        "repeat_penalty": 1.1
    },
        stream=False,
    )

In [22]:
response["message"]["content"].strip()

'Here are the extracted pricing-related terms in JSON format:\n\n```\n[\n  {\n    "service": "Currency Conversion",\n    "standard_service_name": null,\n    "contract_price": null,\n    "unit": null,\n    "rate": 0.75,\n    "rate_unit": "%",\n    "conditions": "published daily exchange rate with markup as specified in pricing schedule",\n    "source_clause": "[SECTION 2]"\n  },\n  {\n    "service": "Correspondent Charges",\n    "standard_service_name": null,\n    "contract_price": null,\n    "unit": null,\n    "rate": 0.08,\n    "rate_unit": "%",\n    "conditions": "Actual + 8% handling",\n    "source_clause": "[SECTION 2]"\n  },\n  {\n    "service": "LC Issuance (up to 180 days)",\n    "standard_service_name": null,\n    "contract_price": 18000,\n    "unit": null,\n    "rate": null,\n    "rate_unit": null,\n    "conditions": null,\n    "source_clause": "[SECTION 3]"\n  },\n  {\n    "service": "LC Amendment",\n    "standard_service_name": null,\n    "contract_price": 6500,\n    "unit":

In [1]:
from rag.extractor import extract_contract_pricing

result = extract_contract_pricing("Axis_COM_2025_001.md")

  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
  return Chroma(


In [2]:
import json 
json.loads(result)

[{'service': 'Account Maintenance',
  'price': 3500,
  'source_clause': 'Service | Fee | | | | Account Maintenance (Monthly) | ₹3,500'},
 {'service': 'Daily Statement (Email)',
  'price': 250,
  'rate_unit': 'statement',
  'source_clause': 'Service | Fee | | | | Daily Statement (Email) | ₹250 per statement'},
 {'service': 'Monthly Reconciliation',
  'price': 1200,
  'rate_unit': 'report',
  'source_clause': 'Service | Fee | | | | Monthly Reconciliation | ₹1,200 per report'},
 {'service': 'Custom MIS Reports',
  'price': 2000,
  'rate_unit': 'report',
  'source_clause': 'Service | Fee | | | | Custom MIS Reports | ₹2,000 per report'},
 {'service': 'Cash Deposit',
  'price': 5,
  'rate_unit': '₹1,000',
  'source_clause': 'Service | Fee | | | | Cash Deposit | ₹5 per ₹1,000 (min ₹100)'},
 {'service': 'Cheque Deposit',
  'price': 25,
  'rate_unit': 'cheque',
  'source_clause': 'Service | Fee | | | | Cheque Deposit | ₹25 per cheque'},
 {'service': 'ACH File Origination',
  'price': 650,
  'so

In [17]:
import json

CANONICAL_KEYS = {
    "service": None,
    "price": None,
    "rate": None,
    "rate_unit": None,
    "source_clause": None
}

def canonicalize_pricing_json(raw_text: str):
    data = json.loads(raw_text)

    normalized = []
    for row in data:
        fixed = CANONICAL_KEYS.copy()
        fixed.update(row)  # overwrite defaults with actual values
        normalized.append(fixed)

    return json.dumps(normalized, indent=2)


In [18]:
import json

REQUIRED_KEYS = {"service", "price", "rate", "rate_unit", "source_clause"}

def validate_pricing_output(raw_text: str):
    try:
        data = json.loads(raw_text)
    except Exception as e:
        raise ValueError(f"❌ Invalid JSON: {e}")

    if not isinstance(data, list):
        raise ValueError("❌ Output must be a JSON array")

    seen = set()
    errors = []

    for i, row in enumerate(data):
        if not isinstance(row, dict):
            errors.append(f"Row {i}: Not an object")
            continue

        missing = REQUIRED_KEYS - row.keys()
        if missing:
            errors.append(f"Row {i}: Missing keys {missing}")

        if row.get("price") is None and row.get("rate") is None:
            errors.append(f"Row {i}: Both price and rate are null")

        key = (row.get("service"), row.get("price"), row.get("rate"))
        if key in seen:
            errors.append(f"Row {i}: Duplicate pricing entry")
        seen.add(key)

    if errors:
        raise ValueError("❌ Validation failed:\n" + "\n".join(errors))

    return data


In [19]:
def deduplicate_pricing_rows(data):
    seen = set()
    unique_rows = []

    for row in data:
        key = (
            row.get("service"),
            row.get("price"),
            row.get("rate"),
            row.get("rate_unit"),
        )

        if key in seen:
            continue

        seen.add(key)
        unique_rows.append(row)

    return unique_rows


In [21]:
# 1. Canonicalize (add missing keys)
canonical_json = canonicalize_pricing_json(result)

# 2. Load JSON
data = json.loads(canonical_json)

# 3. Deduplicate
data = deduplicate_pricing_rows(data)

# 4. Dump back to JSON
final_json = json.dumps(data)

# 5. Validate
validated = validate_pricing_output(final_json)

print("✅ Pricing JSON is valid, canonical, and deduplicated")

✅ Pricing JSON is valid, canonical, and deduplicated


In [22]:
validated

[{'service': 'Currency Conversion Markup',
  'price': None,
  'rate': 0.75,
  'rate_unit': '%',
  'source_clause': 'Bank applies published daily exchange rate with markup as specified in pricing schedule.'},
 {'service': 'Correspondent Charges',
  'price': None,
  'rate': 0.08,
  'rate_unit': '',
  'source_clause': "Bank recovers charges levied by correspondent banks in destination countries, either deducting from remittance amount or charging separately based on Bajaj Auto Limited's instruction."},
 {'service': 'LC Issuance',
  'price': 16000,
  'rate': None,
  'rate_unit': '',
  'source_clause': 'Bajaj Auto Limited applies for LC with beneficiary, amount, validity, and required documents. Bank verifies creditworthiness, prepares LC document in UCP 600 format, and transmits via SWIFT to advising bank.'},
 {'service': 'LC Amendment',
  'price': 6500,
  'rate': None,
  'rate_unit': '',
  'source_clause': 'If Bajaj Auto Limited requests LC amendment during validity (increasing amount, ex

# new

In [1]:
from rag.retriever import retrieve_contract_context,get_pricing_chunks
import ollama
import json

In [2]:
docs = get_pricing_chunks("Axis_COM_2025_001.md")

  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
  return Chroma(


In [3]:
context = "\n\n".join(
        f"[SECTION {d.metadata.get('section_number')}]\n{d.page_content}"
        for d in docs
    )

In [28]:
CASH_MANAGEMENT_SERVICES_EXTRACTION_PROMPT= """
OUTPUT MUST BE VALID JSON ONLY.
NO prose. NO markdown.

ROLE:
You are a contract pricing analyst.

TASK:
Extract ONLY Cash Management service prices from the contract text.

IMPORTANT RULES:
- Extract ONLY explicitly stated prices
- DO NOT assume, infer, or calculate anything
- Ignore all non–cash management services
- If a service is not priced → price = null

CASH MANAGEMENT SERVICES (ONLY THESE):

- Account Maintenance → key: account_maintenance
- Daily Statement (Email) → key: daily_statement_email
- Monthly Reconciliation → key: monthly_reconciliation
- Custom MIS Reports → key: custom_reports

OUTPUT FORMAT (STRICT):

{
  "account_maintenance": {
    "price": number | null,
    "unit": "per_month",
    "source_clause": string | null
  },
  "daily_statement_email": {
    "price": number | null,
    "unit": "per_statement",
    "source_clause": string | null
  },
  "monthly_reconciliation": {
    "price": number | null,
    "unit": "per_report",
    "source_clause": string | null
  },
  "custom_reports": {
    "price": number | null,
    "unit": "per_report",
    "source_clause": string | null
  }
}

FINAL RULES:
- JSON only
- All keys must exist
- No extra services
- Parsable by json.loads()

CONTRACT TEXT:
<<CONTRACT_TEXT>>
"""
prompt = CONTRACT_EXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

NameError: name 'CONTRACT_EXTRACTION_PROMPT' is not defined

In [33]:
prompt

'\nOUTPUT MUST BE VALID JSON ONLY.\nNO prose. NO markdown.\n\nROLE:\nYou are a contract pricing analyst.\n\nTASK:\nExtract ONLY Cash Management service prices from the contract text.\n\nIMPORTANT RULES:\n- Extract ONLY explicitly stated prices\n- DO NOT assume, infer, or calculate anything\n- Ignore all non–cash management services\n- If a service is not priced → price = null\n\nCASH MANAGEMENT SERVICES (ONLY THESE):\n\n- Account Maintenance → key: account_maintenance\n- Daily Statement (Email) → key: daily_statement_email\n- Monthly Reconciliation → key: monthly_reconciliation\n- Custom MIS Reports → key: custom_reports\n\nOUTPUT FORMAT (STRICT):\n\n{\n  "account_maintenance": {\n    "price": number | null,\n    "unit": "per_month",\n    "source_clause": string | null\n  },\n  "daily_statement_email": {\n    "price": number | null,\n    "unit": "per_statement",\n    "source_clause": string | null\n  },\n  "monthly_reconciliation": {\n    "price": number | null,\n    "unit": "per_repor

In [34]:
response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        stream=False,
    )

In [43]:
response["message"]["content"].strip()

'{\n  "account_maintenance": {\n    "price": 3500.0,\n    "unit": "per_month",\n    "source_clause": "SECTION 3 | Service | Fee | | | | Account Maintenance (Monthly) | ₹3,500"\n  },\n  "daily_statement_email": {\n    "price": 250.0,\n    "unit": "per_statement",\n    "source_clause": "SECTION 3 | Service | Fee | | | | Daily Statement (Email) | ₹250 per statement"\n  },\n  "monthly_reconciliation": {\n    "price": 1200.0,\n    "unit": "per_report",\n    "source_clause": "SECTION 3 | Service | Fee | | | | Monthly Reconciliation | ₹1,200 per report"\n  },\n  "custom_reports": {\n    "price": 2000.0,\n    "unit": "per_report",\n    "source_clause": "SECTION 3 | Service | Fee | | | | Custom MIS Reports | ₹2,000 per report"\n  }\n}'

In [44]:
import re
json_text = re.search(r'\{[\s\S]*\}', response["message"]["content"].strip()).group(0)

data = json.loads(json_text)

In [45]:
data['account_maintenance']['price']

3500.0

In [None]:
"cash_management": {
      "account_maintenance": {
        "service_code": "ACCT_MAINT",
        "unit": "per_month",
        "price": 2600,
        "escalation_applicable": truecash 
      },
      "daily_statement_email": {
        "service_code": "DAILY_STMT_EMAIL",
        "unit": "per_statement",
        "price": 120
      },
      "monthly_reconciliation": {
        "service_code": "MONTHLY_RECON",
        "unit": "per_report",
        "price": 700
      },
      "custom_reports": {
        "service_code": "CUSTOM_REPORT",
        "unit": "per_report",
        "price": 1200
      }

In [46]:
DOMESTIC_PAYMENTS_EXTRACTION_PROMPT = """
OUTPUT MUST BE VALID JSON ONLY.
NO prose. NO markdown.

ROLE:
You are a contract pricing analyst.

TASK:
Extract ONLY Domestic Payment service prices from the contract text.

IMPORTANT RULES:
- Extract ONLY explicitly stated prices
- DO NOT assume, infer, or calculate anything
- Ignore all non–domestic payment services
- If a price is not explicitly stated → price = null
- Use numbers only (no currency symbols)

DOMESTIC PAYMENT SERVICES (ONLY THESE):

- ACH File Origination → key: ach_file
- ACH Transaction → key: ach_transaction
- RTGS Transaction → key: rtgs
- NEFT Transaction → key: neft
- Cheque Processing / Cheque Clearing → key: cheque_processing
- Standing Instruction Setup → key: standing_instruction.setup_fee
- Standing Instruction Monthly → key: standing_instruction.monthly_fee

OUTPUT FORMAT (STRICT):

{
  "ach_file": {
    "price": number | null,
    "unit": "per_file",
    "source_clause": string | null
  },
  "ach_transaction": {
    "price": number | null,
    "unit": "per_transaction",
    "source_clause": string | null
  },
  "rtgs": {
    "price": number | null,
    "unit": "per_transaction",
    "source_clause": string | null
  },
  "neft": {
    "price": number | null,
    "unit": "per_transaction",
    "source_clause": string | null
  },
  "cheque_processing": {
    "price": number | null,
    "unit": "per_cheque",
    "source_clause": string | null
  },
  "standing_instruction": {
    "setup_fee": {
      "price": number | null,
      "unit": "one_time",
      "source_clause": string | null
    },
    "monthly_fee": {
      "price": number | null,
      "unit": "per_month",
      "source_clause": string | null
    }
  }
}

FINAL RULES:
- JSON only
- All keys must exist
- No extra services
- Parsable by json.loads()

CONTRACT TEXT:
<<CONTRACT_TEXT>>
"""


In [None]:
prompt = DOMESTIC_PAYMENTS_EXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        stream=False,
    )

In [60]:
import re
json_text = re.search(r'\{[\s\S]*\}', response["message"]["content"].strip()).group(0)

data = json.load(json_text)

In [61]:
data

{'ach_file': {'price': 650,
  'unit': 'per_file',
  'source_clause': 'Section 3, Domestic Payment Processing Charges'},
 'ach_transaction': {'price': 3.5,
  'unit': 'per_transaction',
  'source_clause': 'Section 3, Domestic Payment Processing Charges'},
 'rtgs': {'price': 75,
  'unit': 'per_transaction',
  'source_clause': 'Section 3, Domestic Payment Processing Charges'},
 'neft': {'price': 35,
  'unit': 'per_transaction',
  'source_clause': 'Section 3, Domestic Payment Processing Charges'},
 'cheque_processing': {'price': None,
  'unit': 'per_cheque',
  'source_clause': None},
 'standing_instruction': {'setup_fee': {'price': 500,
   'unit': 'one_time',
   'source_clause': 'Section 3, Domestic Payment Processing Charges'},
  'monthly_fee': {'price': 500,
   'unit': 'per_month',
   'source_clause': 'Section 3, Domestic Payment Processing Charges'}}}

In [70]:
INTERNATIONAL_PAYMENTS_EXTRACTION_PROMPT = """
OUTPUT MUST BE VALID JSON ONLY.
NO prose. NO markdown.

ROLE:
You are a contract pricing analyst.

TASK:
Extract ONLY International Payment service prices from the contract text.

IMPORTANT RULES:
- Extract ONLY explicitly stated prices or percentages
- DO NOT assume, infer, normalize, or calculate anything
- Ignore all non–international payment services
- If a price or percentage is not explicitly stated → set it to null
- Use numbers only (no currency symbols, no % sign)
-Percentages MUST be returned as numeric percentages exactly as written (e.g. 0.75% → 0.75, NOT 75 or 750).

INTERNATIONAL PAYMENT SERVICES (ONLY THESE):

- SWIFT Wire Transfer → key: swift_wire
- FATCA Screening → key: fatca_screening
- Correspondent Bank Charges (handling %) → key: correspondent_charges
- FX Conversion Markup → key: fx_markup

OUTPUT FORMAT (STRICT):

{
  "swift_wire": {
    "price": number | null,
    "unit": "per_transaction",
    "source_clause": string | null
  },
  "fatca_screening": {
    "price": number | null,
    "unit": "per_transaction",
    "source_clause": string | null
  },
  "correspondent_charges": {
    "handling_fee_percent": number | null,
    "unit": "percentage_plus_actual",
    "source_clause": string | null
  },
  "fx_markup": {
    "price": number | null,
    "unit": "percentage",
    "source_clause": string | null
  }
}

FINAL RULES:
- JSON only
- All keys must exist
- No extra services
- Parsable by json.loads()

CONTRACT TEXT:
<<CONTRACT_TEXT>>
"""


In [71]:
prompt = INTERNATIONAL_PAYMENTS_EXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        stream=False,
    )
import re
json_text = re.search(r'\{[\s\S]*\}', response["message"]["content"].strip()).group(0)

data = json.load(json_text)

AttributeError: 'str' object has no attribute 'read'

In [72]:
parsed = json.loads(response["message"]["content"].strip())


In [73]:
parsedf

{'swift_wire': {'price': 950,
  'unit': 'per_transaction',
  'source_clause': '3.4 International Payment Charges | Service | Fee | | | | | SWIFT Wire Transfer | ₹950 per transaction |'},
 'fatca_screening': {'price': 100,
  'unit': 'per_transaction',
  'source_clause': '3.4 International Payment Charges | Service | Fee | | | | | FATCA Screening | ₹100 per transaction |'},
 'correspondent_charges': {'handling_fee_percent': 8,
  'unit': 'percentage_plus_actual',
  'source_clause': '3.4 International Payment Charges | Service | Fee | | | | | Correspondent Charges | Actual + 8% handling |'},
 'fx_markup': {'price': 0.75,
  'unit': 'percentage',
  'source_clause': '3.4 International Payment Charges | Service | Fee | | | | | FX Conversion | 0.75% markup on spot rate |'}}

In [11]:
TRADE_FINANCE_EXTRACTION_PROMPT = """
OUTPUT MUST BE VALID JSON ONLY.
NO prose. NO markdown.

ROLE:
You are a contract pricing analyst.

TASK:
Extract ONLY Trade Finance service prices from the contract text.

SERVICES TO EXTRACT (ONLY THESE):

- LC Issuance → key: lc_issuance
- LC Amendment → key: lc_amendment
- LC Negotiation → key: lc_negotiation
- LC Discrepancy Handling → key: lc_discrepancy
- Bank Guarantee (1 year + additional year) → key: bank_guarantee

RULES:
- Extract ONLY explicitly stated prices
- DO NOT infer or calculate
- If reduced / special price exists, extract the reduced price
- If additional year price exists, extract separately
- If not mentioned → price = null

OUTPUT FORMAT (STRICT):

{
  "lc_issuance": {
    "price": number | null,
    "unit": "per_lc",
    "source_clause": string | null
  },
  "lc_amendment": {
    "price": number | null,
    "unit": "per_amendment",
    "source_clause": string | null
  },
  "lc_negotiation": {
    "price": number | null,
    "unit": "per_document_set",
    "source_clause": string | null
  },
  "lc_discrepancy": {
    "price": number | null,
    "unit": "per_lc",
    "source_clause": string | null
  },
  "bank_guarantee": {
    "price": number | null,
    "unit": "per_year",
    "additional_year_price": number | null,
    "source_clause": string | null
  }
}

FINAL RULES:
- JSON only
- All keys must exist
- No extra services
- Parsable by json.loads()

CONTRACT TEXT:
<<CONTRACT_TEXT>>
"""


In [None]:
prompt = TRADE_FINANCE_EXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        stream=False,
    )
import re
json_text = re.search(r'\{[\s\S]*\}', response["message"]["content"].strip()).group(0)

data = json.loads(response["message"]["content"].strip())
data

In [14]:
prompt



'\nOUTPUT MUST BE VALID JSON ONLY.\nNO prose. NO markdown.\n\nROLE:\nYou are a contract pricing analyst.\n\nTASK:\nExtract ONLY Trade Finance service prices from the contract text.\n\nSERVICES TO EXTRACT (ONLY THESE):\n\n- LC Issuance → key: lc_issuance\n- LC Amendment → key: lc_amendment\n- LC Negotiation → key: lc_negotiation\n- LC Discrepancy Handling → key: lc_discrepancy\n- Bank Guarantee (1 year + additional year) → key: bank_guarantee\n\nRULES:\n- Extract ONLY explicitly stated prices\n- DO NOT infer or calculate\n- If reduced / special price exists, extract the reduced price\n- If additional year price exists, extract separately\n- If not mentioned → price = null\n\nOUTPUT FORMAT (STRICT):\n\n{\n  "lc_issuance": {\n    "price": number | null,\n    "unit": "per_lc",\n    "source_clause": string | null\n  },\n  "lc_amendment": {\n    "price": number | null,\n    "unit": "per_amendment",\n    "source_clause": string | null\n  },\n  "lc_negotiation": {\n    "price": number | null,\

In [15]:
DIGITAL_SERVICESEXTRACTION_PROMPT= """
OUTPUT MUST BE VALID JSON ONLY.
NO prose. NO markdown.

ROLE:
You are a contract pricing analyst.

TASK:
Extract ONLY Digital Services prices from the contract text.

SERVICES TO EXTRACT (ONLY THESE):

- Online Portal Access → key: online_portal
- Sweep Account Management → key: sweep_account
- API Setup → key: api_setup
- API Transaction Fee → key: api_transaction

RULES:
- Extract ONLY explicitly stated prices
- Ignore operational descriptions
- If service is frozen or fixed, still extract price
- If not priced → price = null

OUTPUT FORMAT (STRICT):

{
  "online_portal": {
    "price": number | null,
    "unit": "per_month",
    "source_clause": string | null
  },
  "sweep_account": {
    "price": number | null,
    "unit": "per_month",
    "source_clause": string | null
  },
  "api_setup": {
    "price": number | null,
    "unit": "one_time",
    "source_clause": string | null
  },
  "api_transaction": {
    "price": number | null,
    "unit": "per_transaction",
    "source_clause": string | null
  }
}

FINAL RULES:
- JSON only
- All keys must exist
- No extra services
- Parsable by json.loads()

CONTRACT TEXT:
<<<CONTRACT_TEXT>>
"""


In [16]:
prompt = DIGITAL_SERVICESEXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        stream=False,
    )
import re
json_text = re.search(r'\{[\s\S]*\}', response["message"]["content"].strip()).group(0)

data = json.loads(response["message"]["content"].strip())
data

{'online_portal': {'price': 1500,
  'unit': 'per_month',
  'source_clause': 'SECTION 3, Online Portal Access'},
 'sweep_account': {'price': 3500,
  'unit': 'per_month',
  'source_clause': 'SECTION 3, Sweep Account Management'},
 'api_setup': {'price': 35000,
  'unit': 'one_time',
  'source_clause': 'SECTION 3, API Setup (One time)'},
 'api_transaction': {'price': 2.5,
  'unit': 'per_transaction',
  'source_clause': 'SECTION 3, API Transaction Fee'}}

In [21]:
CREDIT_FACILITIES_EXTRACTION_PROMPT="""
OUTPUT MUST BE VALID JSON ONLY.
NO prose. NO markdown.

ROLE:
You are a contract pricing analyst.

TASK:
Extract ONLY Credit Facility interest rates from the contract text.

SERVICES TO EXTRACT (ONLY THESE):

- Cash Credit Facility → key: cash_credit
- Trade Credit Line → key: trade_credit
- Overdraft Facility → key: overdraft

RULES:
- Extract ONLY explicitly stated interest rates
- If ranges are mentioned, extract min and max
- DO NOT calculate spreads or totals
- Percentages must be numeric (e.g. 10.5, not "10.5%")
-If a single interest rate is stated, set both min and max to the same value.


OUTPUT FORMAT (STRICT):

{
  "cash_credit": {
    "total_rate_percent_range": [number | null, number | null],
    "source_clause": string | null
  },
  "trade_credit": {
    "total_rate_percent_range": [number | null, number | null],
    "source_clause": string | null
  },
  "overdraft": {
    "total_rate_percent_range": [number | null, number | null],
    "source_clause": string | null
  }
}

FINAL RULES:
- JSON only
- All keys must exist
- Parsable by json.loads()

CONTRACT TEXT:
<<CONTRACT_TEXT>>
"""

In [22]:
prompt = CREDIT_FACILITIES_EXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        stream=False,
    )
import re
json_text = re.search(r'\{[\s\S]*\}', response["message"]["content"].strip()).group(0)

data = json.loads(response["message"]["content"].strip())
data

{'cash_credit': {'total_rate_percent_range': [10.5, 10.5],
  'source_clause': 'Base Rate (8.00%) + Spread (2.50%) = 10.50% per annum'},
 'trade_credit': {'total_rate_percent_range': [10.0, 10.0],
  'source_clause': 'Base Rate (8.00%) + Spread (2.00%) = 10.00% per annum'},
 'overdraft': {'total_rate_percent_range': [11.5, None],
  'source_clause': '11.50% per annum on daily overdrawn balance'}}

In [23]:
prompt

'\nOUTPUT MUST BE VALID JSON ONLY.\nNO prose. NO markdown.\n\nROLE:\nYou are a contract pricing analyst.\n\nTASK:\nExtract ONLY Credit Facility interest rates from the contract text.\n\nSERVICES TO EXTRACT (ONLY THESE):\n\n- Cash Credit Facility → key: cash_credit\n- Trade Credit Line → key: trade_credit\n- Overdraft Facility → key: overdraft\n\nRULES:\n- Extract ONLY explicitly stated interest rates\n- If ranges are mentioned, extract min and max\n- DO NOT calculate spreads or totals\n- Percentages must be numeric (e.g. 10.5, not "10.5%")\n-If a single interest rate is stated, set both min and max to the same value.\n\n\nOUTPUT FORMAT (STRICT):\n\n{\n  "cash_credit": {\n    "total_rate_percent_range": [number | null, number | null],\n    "source_clause": string | null\n  },\n  "trade_credit": {\n    "total_rate_percent_range": [number | null, number | null],\n    "source_clause": string | null\n  },\n  "overdraft": {\n    "total_rate_percent_range": [number | null, number | null],\n  

In [26]:
PROGRAMS_and_DISCOUNTS_EXTRACTION_PROMPT="""
OUTPUT MUST BE VALID JSON ONLY.
NO prose. NO markdown.

ROLE:
You are a contract pricing analyst.

TASK:
Extract ONLY programs or discounts that affect pricing.

RULES:
- Extract ONLY explicit discounts
- Ignore marketing text
- Percentages must be numeric

OUTPUT FORMAT (STRICT):

{
  "early_adopter_program": {
    "duration_months": number | null,
    "discount_percent": number | null,
    "applicable_to": string | null,
    "source_clause": string | null
  }
}

FINAL RULES:
- JSON only
- All keys must exist
- Parsable by json.loads()

CONTRACT TEXT:
<<CONTRACT_TEXT>>

"""

In [27]:
prompt = PROGRAMS_and_DISCOUNTS_EXTRACTION_PROMPT.replace(
        "<<CONTRACT_TEXT>>",
        context
    )

response = ollama.chat(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
        stream=False,
    )
import re
json_text = re.search(r'\{[\s\S]*\}', response["message"]["content"].strip()).group(0)

data = json.loads(response["message"]["content"].strip())
data

{'early_adopter_program': {'duration_months': None,
  'discount_percent': None,
  'applicable_to': None,
  'source_clause': None},
 'reduced_lc_fee': {'duration_months': None,
  'discount_percent': 16.0,
  'applicable_to': 'LCs exceeding ₹50 Lakhs',
  'source_clause': 'REDUCED to ₹16,000 (from ₹18,000)'},
 'free_discrepancy_corrections': {'duration_months': None,
  'discount_percent': None,
  'applicable_to': 'First 2 discrepancy corrections per calendar year are PROVIDED FREE',
  'source_clause': None},
 'frozen_online_portal_fee': {'duration_months': None,
  'discount_percent': None,
  'applicable_to': 'Online portal fee FROZEN at ₹1,500 with NO annual escalation for contract term',
  'source_clause': None},
 'fixed_api_transaction_fee': {'duration_months': None,
  'discount_percent': None,
  'applicable_to': 'API transaction fee FIXED at ₹2.50 without annual escalation',
  'source_clause': None}}

In [29]:
for i in (1 ,4,7,9):
    print(i)

1
4
7
9
