In [1]:
import warnings
warnings.filterwarnings("ignore")

import sys
import json
import os
import pandas as pd
import re
from datetime import datetime
from typing import Dict, List, Tuple, Any
import numpy as np

In [2]:
from ingestion.md_loader import load_md


In [36]:
text = load_md("Data/Axis_bank/Axis_COM_2025_001.md")

In [35]:
def extract_by_section_names(text, section_names):
    extracted = {}

    for i, name in enumerate(section_names):
        start = text.lower().find(name.lower())
        if start == -1:
            extracted[name] = ""
            continue

        end = len(text)
        for next_name in section_names[i+1:]:
            pos = text.lower().find(next_name.lower(), start + 1)
            if pos != -1:
                end = pos
                break

        extracted[name] = text[start:end].strip()

    return extracted


In [29]:

class AdvancedContractLoader:
    """Load and parse complex multi-bank contracts"""
    
    def __init__(self, contract_path: str):
        self.contract_path = contract_path
        self.raw_text = None
        self.sections = {}
        self.contract_id = os.path.basename(contract_path).replace('.md', '')
        
    def load(self) -> str:
        """Load contract file"""
        try:
            with open(self.contract_path, 'r', encoding='utf-8') as f:
                self.raw_text = f.read()
            print(f"✅ Loaded: {self.contract_id}")
            return self.raw_text
        except Exception as e:
            print(f"❌ Error loading {self.contract_path}: {e}")
            return None
    
    def extract_sections(self) -> Dict[str, str]:
        """Extract major contract sections"""
        sections = {
            'introduction': self._extract_section(1),
            'services': self._extract_section(2),
            'pricing': self._extract_section(3),  # ⭐ KEY SECTION
            'credit_facilities': self._extract_section(4),
            'sla': self._extract_section(5),
            'representations': self._extract_section(6),
            'liability': self._extract_section(7),
            'confidentiality': self._extract_section(8),
            'termination': self._extract_section(9),
            'amendments': self._extract_section(10),
        }
        self.sections = sections
        return sections
    
    def _extract_section(self, section_num: int) -> str:
        if not self.raw_text:
            return ""
    
        pattern = rf'''
            (##+\s*{section_num}\.?.*?)      # Section header
            (?=\n##+\s*\d+\.|\Z)             # Stop at next section or EOF
        '''
    
        match = re.search(pattern, self.raw_text, re.DOTALL | re.VERBOSE)
        return match.group(1).strip() if match else ""



In [30]:
pipeline= AdvancedContractLoader("Data/Axis_bank/Axis_COM_2025_001.md")

In [31]:
pipeline.load()
pipeline.extract_sections()

✅ Loaded: Axis_COM_2025_001


error: missing ), unterminated subpattern at position 63 (line 3, column 13)

In [32]:
pipeline._extract_section(3)

error: missing ), unterminated subpattern at position 63 (line 3, column 13)

In [None]:
{
  "catalogue_metadata": {
    "bank_name": "Axis Bank Limited",
    "business_line": "Commercial Banking",
    "regulator": "Reserve Bank of India",
    "catalogue_version": "Axis-COM-2025",
    "effective_date": "2025-01-01",
    "currency": "INR",
    "gst_applicable": true,
    "gst_rate_percent": 18,
    "pricing_strategy": "growth_oriented_aggressive",
    "annual_escalation_percent": 3.5
  },

  "pricing_categories": {
    "cash_management": {
      "account_maintenance": {
        "service_code": "ACCT_MAINT",
        "unit": "per_month",
        "price": 2600,
        "escalation_applicable": truecash 
      },
      "daily_statement_email": {
        "service_code": "DAILY_STMT_EMAIL",
        "unit": "per_statement",
        "price": 120
      },
      "monthly_reconciliation": {
        "service_code": "MONTHLY_RECON",
        "unit": "per_report",
        "price": 700
      },
      "custom_reports": {
        "service_code": "CUSTOM_REPORT",
        "unit": "per_report",
        "price": 1200
      }
    },

    "domestic_payments": {
      "ach_file": {
        "service_code": "ACH_FILE",
        "unit": "per_file",
        "price": 450
      },
      "ach_transaction": {
        "service_code": "ACH_TXN",
        "unit": "per_transaction",
        "price": 2.5
      },
      "rtgs": {
        "service_code": "RTGS",
        "unit": "per_transaction",
        "price": 45
      },
      "neft": {
        "service_code": "NEFT",
        "unit": "per_transaction",
        "price": 22
      },
      "cheque_processing": {
        "service_code": "CHEQUE",
        "unit": "per_cheque",
        "price": 30
      },
      "standing_instruction": {
        "setup_fee": {
          "unit": "one_time",
          "price": 300
        },
        "monthly_fee": {
          "unit": "per_month",
          "price": 300
        }
      }
    },

    "international_payments": {
      "swift_wire": {
        "service_code": "SWIFT",
        "unit": "per_transaction",
        "price": 800
      },
      "fatca_screening": {
        "service_code": "FATCA",
        "unit": "per_transaction",
        "price": 60
      },
      "correspondent_charges": {
        "service_code": "CORR_BANK",
        "unit": "percentage_plus_actual",
        "handling_fee_percent": 5.5
      },
      "fx_markup": {
        "service_code": "FX_MARKUP",
        "unit": "percentage",
        "price": 0.7
      }
    },

    "trade_finance": {
      "lc_issuance": {
        "service_code": "LC_ISSUE",
        "unit": "per_lc",
        "price": 13500
      },
      "lc_amendment": {
        "service_code": "LC_AMEND",
        "unit": "per_amendment",
        "price": 4500
      },
      "lc_negotiation": {
        "service_code": "LC_NEG",
        "unit": "per_document_set",
        "price": 3000
      },
      "lc_discrepancy": {
        "service_code": "LC_DISC",
        "unit": "per_lc",
        "price": 3500
      },
      "bank_guarantee": {
        "service_code": "BG",
        "unit": "per_year",
        "price": 11000,
        "additional_year_price": 2200
      }
    },

    "digital_services": {
      "online_portal": {
        "service_code": "PORTAL",
        "unit": "per_month",
        "price": 1000
      },
      "sweep_account": {
        "service_code": "SWEEP",
        "unit": "per_month",
        "price": 2500
      },
      "api_setup": {
        "service_code": "API_SETUP",
        "unit": "one_time",
        "price": 25000
      },
      "api_transaction": {
        "service_code": "API_TXN",
        "unit": "per_transaction",
        "price": 1.75
      }
    },

    "credit_facilities": {
      "cash_credit": {
        "base_rate_percent": 8.0,
        "spread_percent_range": [1.75, 2.25],
        "total_rate_percent_range": [9.75, 10.25]
      },
      "trade_credit": {
        "spread_percent_range": [1.5, 2.0],
        "total_rate_percent_range": [9.5, 10.0]
      },
      "overdraft": {
        "total_rate_percent_range": [10.25, 11.25]
      }
    }
  },

  "programs_and_discounts": {
    "early_adopter_program": {
      "duration_months": 6,
      "discount_percent": 5,
      "applicable_to": "all_fees"
    },
    "referral_bonus": {
      "bonus_type": "account_credit",
      "amount": 10000
    }
  },

  "penalties_and_escalation": {
    "annual_escalation_percent": 3.5,
    "late_payment_interest_percent": 10.5
  },

  "contacts": {
    "pricing": {
      "email": "pricing@axisbank.com",
      "phone": "+91-22-6152-6152"
    },
    "corporate_banking": {
      "email": "corporate@axisbank.com",
      "phone": "+91-22-6152-6253"
    }
  }
}
