In [None]:
# Install required libraries
# !pip install pdfplumber pandas

import pdfplumber
import re
import pandas as pd
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
import os

@dataclass
class CreditCardData:
    """Standardized data structure for all credit card statements"""
    bank_name: str
    cardholder_name: str
    card_last_4: str
    statement_date: str
    payment_due_date: str
    total_amount_due: float
    minimum_amount_due: float
    credit_limit: float
    available_credit: float
    transactions: List[Dict]

class CreditCardParser:
    """Main parser class with bank-specific adapters"""

    def __init__(self):
        self.bank_identifiers = {
            'HDFC': ['HDFC Bank', 'HDFC BANK', 'Paytm HDFC'],
            'ICICI': ['ICICI Bank', 'ICICI BANK', 'ICICI CARD'],
            'Axis': ['AXIS BANK', 'Axis Bank', 'Axis Cards', 'Flipkart Axis Bank'],
            'IDFC First': ['IDFC FIRST', 'IDFC FIRST BANK', 'IDFC Bank'],
            'Indian Bank': ['Indian Bank', 'INDIAN BANK', 'IBGCC']
        }

    def identify_bank(self, text: str) -> str:
        """Identify which bank issued the statement"""
        text_upper = text.upper()
        for bank, identifiers in self.bank_identifiers.items():
            for identifier in identifiers:
                if identifier.upper() in text_upper:
                    return bank
        return "UNKNOWN"

    def parse_statement(self, pdf_path: str) -> CreditCardData:
        """Main parsing function"""
        try:
            with pdfplumber.open(pdf_path) as pdf:
                text = self._extract_text(pdf)

                if not text.strip():
                    raise ValueError("No text found in PDF")

                bank_name = self.identify_bank(text)
                print(f"🔍 Identified Bank: {bank_name}")

                # Route to bank-specific parser
                if bank_name == "HDFC":
                    return self._parse_hdfc_fixed(text, pdf_path)
                elif bank_name == "ICICI":
                    return self._parse_icici_improved(text, pdf_path)
                elif bank_name == "Axis":
                    return self._parse_axis(text, pdf_path)
                elif bank_name == "IDFC First":
                    return self._parse_idfc(text, pdf_path)
                elif bank_name == "Indian Bank":
                    return self._parse_indian_bank(text, pdf_path)
                else:
                    return self._parse_generic(text, pdf_path)

        except Exception as e:
            print(f"❌ Failed to process {pdf_path}: {str(e)}")
            raise

    def _extract_text(self, pdf) -> str:
        """Extract text from PDF with multiple strategies"""
        text = ""
        for page in pdf.pages:
            page_text = page.extract_text(layout=True) or page.extract_text() or ""
            text += page_text + "\n"
        return text

    # ===== FIXED HDFC BANK PARSER =====

    def _parse_hdfc_fixed(self, text: str, pdf_path: str) -> CreditCardData:
        """HDFC Bank statement parser - FIXED VERSION"""
        print("🔧 Using HDFC Bank parser (Fixed)...")

        cardholder_name = self._extract_hdfc_name_fixed(text)
        card_last_4 = self._extract_hdfc_card_last_4_fixed(text)
        statement_date = self._extract_hdfc_statement_date_fixed(text)
        payment_due_date = self._extract_hdfc_due_date_fixed(text)
        total_amount_due = self._extract_hdfc_total_due_fixed(text)
        minimum_amount_due = self._extract_hdfc_min_due_fixed(text)
        credit_limit = self._extract_hdfc_credit_limit_fixed(text)
        available_credit = self._extract_hdfc_available_credit_fixed(text)
        transactions = self._extract_hdfc_transactions_fixed(text)

        data = {
            'bank_name': 'HDFC',
            'cardholder_name': cardholder_name,
            'card_last_4': card_last_4,
            'statement_date': statement_date,
            'payment_due_date': payment_due_date,
            'total_amount_due': total_amount_due,
            'minimum_amount_due': minimum_amount_due,
            'credit_limit': credit_limit,
            'available_credit': available_credit,
            'transactions': transactions
        }
        return CreditCardData(**data)

    def _extract_hdfc_name_fixed(self, text: str) -> str:
        """Extract cardholder name from HDFC statement - FIXED"""
        print("🔍 Extracting HDFC name...")

        # Pattern 1: Look for "Name : NAME" pattern (most reliable)
        match = re.search(r'Name\s*:\s*([A-Z][A-Za-z\s]+?)(?:\n|Email)', text, re.IGNORECASE)
        if match:
            name = match.group(1).strip()
            # Clean up - remove any trailing non-letter characters
            name = re.sub(r'[^A-Za-z\s]+$', '', name).strip()
            if name and len(name) > 2:
                print(f"✅ Found HDFC name: '{name}'")
                return name

        # Pattern 2: Look for name in the header section before "000Paytm"
        match = re.search(r'Name\s*:\s*([A-Z\s]+)\s*\n\s*000', text)
        if match:
            name = match.group(1).strip()
            name = re.sub(r'[^A-Za-z\s]+$', '', name).strip()
            if name and len(name) > 2:
                print(f"✅ Found HDFC name (pattern 2): '{name}'")
                return name

        # Pattern 3: Extract from transaction section header (before first transaction date)
        # Look for "Domestic Transactions" followed by headers, then the name line
        match = re.search(r'Domestic Transactions\s+Date\s+Transaction Description\s+Amount.*?\n\s*([A-Z][A-Z\s]+[A-Z])\s*\n\s*\d{2}/\d{2}/\d{4}', text, re.DOTALL)
        if match:
            name = match.group(1).strip()
            # Ensure it's a valid name (not a transaction description)
            if name and len(name.split()) >= 2 and not any(word in name for word in ['PAYTM', 'TRANSACTION', 'AMOUNT', 'DATE', 'NOIDA', 'DELHI']):
                print(f"✅ Found HDFC name from transactions: '{name}'")
                return name

        print("❌ Name not found in HDFC statement")
        return "Not Found"

    def _extract_hdfc_card_last_4_fixed(self, text: str) -> str:
        """Extract last 4 digits from HDFC statement - FIXED"""
        print("🔍 Extracting HDFC card last 4...")

        # Pattern: Card No: 4695 25XX XXXX 3458
        pattern1 = r'Card No:\s*\d{4}\s*\d{2}XX\s*XXXX\s*(\d{4})'
        match = re.search(pattern1, text)
        if match:
            last_4 = match.group(1)
            print(f"✅ Found HDFC card last 4: {last_4}")
            return last_4

        # Alternative pattern
        pattern2 = r'\d{4}\s+\d{2}X+\s+X+\s+(\d{4})'
        match = re.search(pattern2, text)
        if match:
            last_4 = match.group(1)
            print(f"✅ Found HDFC card last 4: {last_4}")
            return last_4

        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_hdfc_statement_date_fixed(self, text: str) -> str:
        """Extract statement date from HDFC - FIXED"""
        print("🔍 Extracting HDFC statement date...")

        # Pattern: Statement Date:12/03/2023
        pattern = r'Statement Date:\s*(\d{2}/\d{2}/\d{4})'
        match = re.search(pattern, text)
        if match:
            date = match.group(1)
            print(f"✅ Found HDFC statement date: {date}")
            return date

        print("❌ Statement date not found")
        return "Not Found"

    def _extract_hdfc_due_date_fixed(self, text: str) -> str:
        """Extract payment due date from HDFC - FIXED"""
        print("🔍 Extracting HDFC payment due date...")

        # Pattern: Payment Due Date at the start of line followed by Total Dues
        pattern1 = r'Payment Due Date\s+Total Dues.*?\n(\d{2}/\d{2}/\d{4})'
        match = re.search(pattern1, text, re.DOTALL)
        if match:
            date = match.group(1)
            print(f"✅ Found HDFC payment due date: {date}")
            return date

        # Alternative: Look for date in the summary section
        pattern2 = r'(\d{2}/\d{2}/\d{4})\s+[\d,]+\.[\d]{2}\s+[\d,]+\.[\d]{2}'
        match = re.search(pattern2, text)
        if match:
            date = match.group(1)
            print(f"✅ Found HDFC payment due date (alt): {date}")
            return date

        print("❌ Payment due date not found")
        return "Not Found"

    def _extract_hdfc_total_due_fixed(self, text: str) -> float:
        """Extract total amount due from HDFC - FIXED"""
        print("🔍 Extracting HDFC total amount due...")

        # Look for the summary table with Payment Due Date, Total Dues, Minimum Amount Due
        pattern1 = r'(\d{2}/\d{2}/\d{4})\s+([\d,]+\.[\d]{2})\s+([\d,]+\.[\d]{2})'
        match = re.search(pattern1, text)
        if match:
            amount_str = match.group(2).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC total amount due: {amount}")
                return amount
            except ValueError:
                pass

        # Alternative: Look for "Total Dues" label
        pattern2 = r'Total Dues[^\d]+([\d,]+\.[\d]{2})'
        match = re.search(pattern2, text)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC total amount due (alt): {amount}")
                return amount
            except ValueError:
                pass

        print("❌ Total amount due not found")
        return 0.0

    def _extract_hdfc_min_due_fixed(self, text: str) -> float:
        """Extract minimum amount due from HDFC - FIXED"""
        print("🔍 Extracting HDFC minimum amount due...")

        # Look for the third column in the summary table
        pattern1 = r'(\d{2}/\d{2}/\d{4})\s+([\d,]+\.[\d]{2})\s+([\d,]+\.[\d]{2})'
        match = re.search(pattern1, text)
        if match:
            amount_str = match.group(3).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC minimum amount due: {amount}")
                return amount
            except ValueError:
                pass

        # Alternative: Look for "Minimum Amount Due" label
        pattern2 = r'Minimum Amount Due[^\d]+([\d,]+\.[\d]{2})'
        match = re.search(pattern2, text)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC minimum amount due (alt): {amount}")
                return amount
            except ValueError:
                pass

        print("❌ Minimum amount due not found")
        return 0.0

    def _extract_hdfc_credit_limit_fixed(self, text: str) -> float:
        """Extract credit limit from HDFC - FIXED"""
        print("🔍 Extracting HDFC credit limit...")

        # Pattern 1: Credit Limit in header row with values below
        # Credit Limit Available Credit Limit Available Cash Limit
        # 30,000 0.00
        match = re.search(r'Credit Limit\s+Available Credit Limit\s+Available Cash Limit\s*\n\s*([\d,]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC credit limit: {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 2: Look for inline format "Credit Limit | amount"
        match = re.search(r'Credit Limit\s*\|\s*([\d,]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC credit limit (pattern 2): {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 3: Look for "Credit Limit" followed by amount on same line
        match = re.search(r'Credit Limit[^\d\n]*([\d,]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                # Verify it's a reasonable credit limit (between 1000 and 100 crores)
                if 1000 <= amount <= 1000000000:
                    print(f"✅ Found HDFC credit limit (pattern 3): {amount}")
                    return amount
            except ValueError:
                pass

        print("❌ Credit limit not found")
        return 0.0

    def _extract_hdfc_available_credit_fixed(self, text: str) -> float:
        """Extract available credit from HDFC - FIXED"""
        print("🔍 Extracting HDFC available credit...")

        # Pattern 1: Available Credit Limit in header row with values below
        # Credit Limit Available Credit Limit Available Cash Limit
        # 30,000 0.00 [third value]
        # Match: Credit Limit (no decimal) or with decimal, whitespace, then Available Credit (second number)
        match = re.search(r'Credit Limit\s+Available Credit Limit\s+Available Cash Limit\s*\n\s*([\d,]+)(?:\.[\d]+)?\s+([\d,]+\.[\d]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(2).replace(',', '')  # Get second number (Available Credit)
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC available credit: {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 2: Simpler - just find two consecutive numbers after the header
        match = re.search(r'Credit Limit\s+Available Credit Limit.*?\n\s*[\d,]+(?:\.\d+)?\s+([\d,]+\.\d+)', text, re.IGNORECASE | re.DOTALL)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC available credit (pattern 2): {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 3: Look for inline format "Available Credit Limit | amount"
        match = re.search(r'Available Credit Limit\s*\|\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC available credit (pattern 3): {amount}")
                return amount
            except ValueError:
                pass

        print("❌ Available credit not found")
        return 0.0

    def _extract_hdfc_transactions_fixed(self, text: str) -> List[Dict]:
        """Extract transactions from HDFC statement - FIXED"""
        print("🔍 Extracting HDFC transactions...")
        transactions = []

        # Find the Domestic Transactions section
        pattern = r'Domestic Transactions\s+Date\s+Transaction Description\s+Amount.*?(?=Reward Points|$)'
        domestic_match = re.search(pattern, text, re.DOTALL)

        if not domestic_match:
            print("❌ Could not find Domestic Transactions section")
            return transactions

        transaction_text = domestic_match.group(0)
        lines = transaction_text.split('\n')

        excluded_keywords = ['Domestic Transactions', 'Date', 'Transaction Description', 'Amount']

        for line in lines:
            line = line.strip()

            # Skip header lines and empty lines
            if not line or any(keyword in line for keyword in excluded_keywords):
                continue

            # Skip lines with cardholder name (they appear before transactions)
            if re.match(r'^[A-Z][A-Za-z\s]+[A-Z]$', line):
                continue

            # Pattern: DD/MM/YYYY Description Amount (with optional Cr)
            tx_pattern = r'(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,]+\.[\d]{2})(\s+Cr)?$'
            match = re.match(tx_pattern, line)

            if match:
                date = match.group(1)
                description = match.group(2).strip()
                amount_str = match.group(3).replace(',', '')
                is_credit = match.group(4) is not None

                try:
                    amount = float(amount_str)
                    # If it's a credit (Cr), make it negative
                    if is_credit:
                        amount = -amount

                    transactions.append({
                        'date': date,
                        'description': description,
                        'amount': amount
                    })
                    print(f"✅ Found transaction: {date} - {description[:30]}... - {amount}")
                except ValueError:
                    continue

        print(f"✅ Total HDFC transactions found: {len(transactions)}")
        return transactions

    # ===== ICICI BANK PARSER (FIXED) =====

    def _parse_icici_improved(self, text: str, pdf_path: str) -> CreditCardData:
        """ICICI Bank statement parser - FIXED VERSION"""
        print("🔧 Using ICICI Bank parser (Fixed)...")

        cardholder_name = self._extract_icici_name(text)
        card_last_4 = self._extract_icici_card_last_4(text)
        statement_date = self._extract_icici_statement_date(text)
        payment_due_date = self._extract_icici_due_date(text)
        total_amount_due = self._extract_icici_total_due(text)
        minimum_amount_due = self._extract_icici_min_due(text)
        credit_limit = self._extract_icici_credit_limit(text)
        available_credit = self._extract_icici_available_credit(text)
        transactions = self._extract_icici_transactions(text)

        data = {
            'bank_name': 'ICICI',
            'cardholder_name': cardholder_name,
            'card_last_4': card_last_4,
            'statement_date': statement_date,
            'payment_due_date': payment_due_date,
            'total_amount_due': total_amount_due,
            'minimum_amount_due': minimum_amount_due,
            'credit_limit': credit_limit,
            'available_credit': available_credit,
            'transactions': transactions
        }
        return CreditCardData(**data)

    def _extract_icici_name(self, text: str) -> str:
        """Extract cardholder name from ICICI statement"""
        print("🔍 Extracting ICICI name...")
        match = re.search(r'((?:MR|MS|MRS|DR)\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\n\s*(?:AT/PO|FLAT|HOUSE|[A-Z\s,/]+\n)', text)
        if match:
            name = match.group(1).strip()
            print(f"✅ Found ICICI name: '{name}'")
            return name
        print("❌ Name not found in ICICI statement")
        return "Not Found"

    def _extract_icici_card_last_4(self, text: str) -> str:
        """Extract last 4 digits from ICICI statement"""
        print("🔍 Extracting ICICI card last 4...")
        match = re.search(r'\d{4}X+(\d{4})', text)
        if match:
            last_4 = match.group(1)
            print(f"✅ Found ICICI card last 4: {last_4}")
            return last_4
        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_icici_statement_date(self, text: str) -> str:
        """Extract statement date from ICICI"""
        print("🔍 Extracting ICICI statement date...")
        match = re.search(r'STATEMENT DATE.*?([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text, re.IGNORECASE | re.DOTALL)
        if match:
            date = match.group(1)
            print(f"✅ Found ICICI statement date: {date}")
            return date
        match = re.search(r'Statement period\s*:\s*[A-Za-z]+\s+\d{1,2},\s+\d{4}\s+to\s+([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text)
        if match:
            date = match.group(1)
            print(f"✅ Found ICICI statement date (pattern 2): {date}")
            return date
        print("❌ Statement date not found")
        return "Not Found"

    def _extract_icici_due_date(self, text: str) -> str:
        """Extract payment due date from ICICI"""
        print("🔍 Extracting ICICI payment due date...")
        match = re.search(r'PAYMENT DUE DATE.*?([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text, re.IGNORECASE | re.DOTALL)
        if match:
            date = match.group(1).strip()
            print(f"✅ Found ICICI payment due date: {date}")
            return date
        # Alternative: Look for two dates near each other
        dates = re.findall(r'([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text[:1000])
        if len(dates) >= 2:
            date = dates[1]  # Second date is usually due date
            print(f"✅ Found ICICI payment due date (pattern 2): {date}")
            return date
        print("❌ Payment due date not found")
        return "Not Found"

    def _extract_icici_total_due(self, text: str) -> float:
        """Extract total amount due from ICICI"""
        print("🔍 Extracting ICICI total amount due...")
        match = re.search(r'Total Amount due\s+[`₹]?\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '').replace('`', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found ICICI total amount due: {amount}")
                return amount
            except ValueError:
                pass
        print("❌ Total amount due not found")
        return 0.0

    def _extract_icici_min_due(self, text: str) -> float:
        """Extract minimum amount due from ICICI"""
        print("🔍 Extracting ICICI minimum amount due...")

        # 1) Try explicit patterns first (common formats)
        patterns = [
            r'Minimum\s+Amount\s+due\s*[:\-]?\s*[`₹]?\s*([\d,]+\.?\d*)',
            r'Minimum\s+Amount\s*[:\-]?\s*[`₹]?\s*([\d,]+\.?\d*)',
            r'Minimum\s+Amount\s+Payable\s*[:\-]?\s*[`₹]?\s*([\d,]+\.?\d*)',
            r'Amount\s+Due\s+\(Minimum\)\s*[`₹]?\s*([\d,]+\.?\d*)'
        ]
        for pat in patterns:
            match = re.search(pat, text, re.IGNORECASE)
            if match:
                amount_str = match.group(1).replace(',', '').replace('`', '')
                try:
                    amount = float(amount_str)
                    print(f"✅ Found ICICI minimum amount due (pattern): {amount}")
                    return amount
                except ValueError:
                    continue

        # 2) Prefer exact label position: find 'Minimum Amount due' and then look on the same line or next few lines
        label_match = re.search(r'Minimum\s+Amount\s+due', text, re.IGNORECASE)
        if label_match:
            # Split into lines and find the line index containing the label
            lines = text.splitlines()
            # compute character position to line index mapping
            char_index = 0
            found_line_idx = None
            for idx, ln in enumerate(lines):
                start = char_index
                end = char_index + len(ln) + 1
                if start <= label_match.start() < end:
                    found_line_idx = idx
                    break
                char_index = end

            if found_line_idx is not None:
                # scan same line and up to next 3 non-empty lines for the first numeric value
                for offset in range(0, 4):
                    li = found_line_idx + offset
                    if li >= len(lines):
                        break
                    line = lines[li].strip()
                    nums = re.findall(r'[`₹]?\s*([\d,]+\.?\d*)', line)
                    for n in nums:
                        try:
                            val = float(n.replace(',', '').replace('`', ''))
                            if val > 0:
                                print(f"✅ Found ICICI minimum amount due (near label): {val}")
                                return val
                        except ValueError:
                            continue

        # 3) Window fallback around 'Total Amount due': pick the first reasonable numeric after the "Minimum" label if present,
        #    otherwise pick the first positive number in the window that is clearly not the total (i.e., smaller than total if total exists)
        total = None
        total_match = re.search(r'Total\s+Amount\s+due\s*[`₹]?\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if total_match:
            try:
                total = float(total_match.group(1).replace(',', '').replace('`', ''))
            except ValueError:
                total = None

        # find the first 'Minimum' occurrence line index, else use the first 'Total Amount due' match position
        anchor_pos = None
        m2 = re.search(r'Minimum\s+Amount\s+due', text, re.IGNORECASE)
        if m2:
            anchor_pos = m2.end()
        elif total_match:
            anchor_pos = total_match.end()

        if anchor_pos is not None:
            window_start = max(0, anchor_pos - 200)
            window_end = min(len(text), anchor_pos + 400)
            window = text[window_start:window_end]
            nums = re.findall(r'[`₹]?\s*([\d,]+\.?\d*)', window)
            cleaned = []
            for n in nums:
                try:
                    cleaned.append(float(n.replace(',', '').replace('`', '')))
                except ValueError:
                    continue
            # Prefer the first positive number after the label in the window
            if cleaned:
                for v in cleaned:
                    if v > 0 and (total is None or v <= total):
                        print(f"✅ Found ICICI minimum amount due (window heuristic): {v}")
                        return v

        # 4) Last resort: search lines containing 'MINIMUM' and parse numbers there
        for line in text.splitlines():
            if 'MINIMUM' in line.upper() or 'MIN DUE' in line.upper():
                nums = re.findall(r'[`₹]?\s*([\d,]+\.?\d*)', line)
                for n in nums:
                    try:
                        val = float(n.replace(',', '').replace('`', ''))
                        if val > 0:
                            print(f"✅ Found ICICI minimum amount due (line heuristic): {val}")
                            return val
                    except ValueError:
                        continue

        print("❌ Minimum amount due not found")
        return 0.0

    def _extract_icici_credit_limit(self, text: str) -> float:
        """Extract credit limit from ICICI"""
        print("🔍 Extracting ICICI credit limit...")
        # Pattern for table format
        match = re.search(
            r"Credit Limit \(Including cash\)\s+Available Credit.*?[`₹]\s*([\d,]+\.?\d*)",
            text,
            re.IGNORECASE | re.DOTALL
        )
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found ICICI credit limit: {amount}")
                return amount
            except ValueError:
                pass
        print("❌ Credit limit not found")
        return 0.0

    def _extract_icici_available_credit(self, text: str) -> float:
        """Extract available credit from ICICI"""
        print("🔍 Extracting ICICI available credit...")
        # Look for the pattern with both credit limit and available credit
        match = re.search(
            r"Credit Limit \(Including cash\)\s+Available Credit \(Including cash\).*?[`₹]\s*[\d,]+\.?\d*\s+[`₹]\s*([\d,]+\.?\d*)",
            text,
            re.IGNORECASE | re.DOTALL
        )
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found ICICI available credit: {amount}")
                return amount
            except ValueError:
                pass
        print("❌ Available credit not found")
        return 0.0

    def _extract_icici_transactions(self, text: str) -> List[Dict]:
        """Extract transactions from ICICI statement - FIXED for multi-page"""
        print("🔍 Extracting ICICI transactions...")
        transactions = []

        # Pattern to match ICICI transactions
        # Format: DD/MM/YYYY SerialNumber Description Amount [CR]
        pattern = re.compile(
            r'(\d{2}/\d{2}/\d{4})\s+'  # Date
            r'(\d+)\s+'  # Serial number
            r'(.+?)\s+'  # Description
            r'(?:IN\s+)?'  # Optional "IN"
            r'([\d,]+\.?\d*)\s*'  # Amount
            r'(CR)?'  # Optional CR
            r'(?:\s*$)',  # End of line
            re.MULTILINE
        )

        matches = pattern.findall(text)

        for match in matches:
            date, serial, description, amount_str, is_credit = match

            # Clean description
            description = description.strip()
            description = re.sub(r'\s+', ' ', description)

            # Skip headers
            if any(keyword in description.upper() for keyword in
                   ['TRANSACTION DETAILS', 'DATE', 'SERNO', 'AMOUNT', 'INTL', 'STATEMENT']):
                continue

            try:
                amount = float(amount_str.replace(',', ''))

                # Credits should be negative
                if is_credit:
                    amount = -amount

                transactions.append({
                    'date': date,
                    'description': description,
                    'amount': amount
                })
                print(f"✅ Found transaction: {date} - {description[:30]}... - {amount}")
            except ValueError:
                continue

        # Remove duplicates
        seen = set()
        unique_transactions = []
        for tx in transactions:
            key = (tx['date'], tx['description'], tx['amount'])
            if key not in seen:
                seen.add(key)
                unique_transactions.append(tx)

        print(f"✅ Total ICICI transactions found: {len(unique_transactions)}")
        return unique_transactions


    def _parse_axis(self, text: str, pdf_path: str) -> CreditCardData:
        """Axis Bank Credit Card Statement Parser (Dynamic & Fixed)"""
        print("🔧 Using Axis Bank parser (Fixed)...")

        # --- Extract fields ---
        cardholder_name = self._extract_axis_name(text)
        card_last_4 = self._extract_axis_card_last_4(text)
        statement_date = self._extract_axis_statement_date(text)
        payment_due_date = self._extract_axis_due_date(text)
        total_amount_due = self._extract_axis_total_due(text)
        minimum_amount_due = self._extract_axis_min_due(text)
        credit_limit = self._extract_axis_credit_limit(text)
        available_credit = self._extract_axis_available_credit(text)
        transactions = self._extract_axis_transactions(text)

        data = {
            'bank_name': 'Axis Bank',
            'cardholder_name': cardholder_name,
            'card_last_4': card_last_4,
            'statement_date': statement_date,
            'payment_due_date': payment_due_date,
            'total_amount_due': total_amount_due,
            'minimum_amount_due': minimum_amount_due,
            'credit_limit': credit_limit,
            'available_credit': available_credit,
            'transactions': transactions
        }
        return CreditCardData(**data)

    # ========== Axis Bank Field Extractors ==========

    # ===================== AXIS BANK PARSER (REPLACEMENT) =====================

    def _parse_axis(self, text: str, pdf_path: str) -> CreditCardData:
        """Axis Bank Credit Card Statement Parser (Robust Enhanced Version)"""
        print("🔧 Using Axis Bank parser (Robust Enhanced)...")

        cardholder_name = self._extract_axis_name(text)
        card_last_4 = self._extract_axis_card_last_4(text)
        statement_date, payment_due_date, total_amount_due, minimum_amount_due = self._extract_axis_payment_summary(text)
        credit_limit, available_credit = self._extract_axis_limits(text)
        transactions = self._extract_axis_transactions(text)

        return CreditCardData(
            bank_name='Axis Bank',
            cardholder_name=cardholder_name,
            card_last_4=card_last_4,
            statement_date=statement_date or "Not Found",
            payment_due_date=payment_due_date or "Not Found",
            total_amount_due=total_amount_due,
            minimum_amount_due=minimum_amount_due,
            credit_limit=credit_limit,
            available_credit=available_credit,
            transactions=transactions
        )

    # ---------------- Axis Extractors ----------------

    def _extract_axis_name(self, text: str) -> str:
        print("🔍 Extracting Axis cardholder name...")
        # Common pattern: uppercase name before address
        match = re.search(r'\n([A-Z][A-Z\s,.-]+)\nB/', text)
        if match:
            name = re.sub(r'\s+', ' ', match.group(1).strip())
            print(f"✅ Found Axis name: '{name}'")
            return name

        # Fallback: first uppercase line that looks like a name
        for line in text.splitlines():
            ln = line.strip()
            if ln.isupper() and len(ln.split()) >= 2 and len(ln) < 60:
                if not any(k in ln for k in ['AXIS', 'STATEMENT', 'PAYMENT', 'SUMMARY']):
                    print(f"✅ Found Axis name (fallback): '{ln}'")
                    return re.sub(r'\s+', ' ', ln)
        print("❌ Name not found")
        return "Not Found"

    def _extract_axis_card_last_4(self, text: str) -> str:
        print("🔍 Extracting Axis card last 4 digits...")
        match = re.search(r'(\d{6}\*{6}(\d{4}))|(\*{6}(\d{4}))', text)
        if match:
            for g in match.groups():
                if g and re.fullmatch(r'\d{4}', g):
                    print(f"✅ Found Axis card last 4: {g}")
                    return g
        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_axis_payment_summary(self, text: str):
        """Extract statement date, due date, total/min due from Payment Summary block"""
        print("🔍 Extracting Axis payment summary...")
        section = text[text.upper().find('PAYMENT SUMMARY'):text.upper().find('AUTO-DEBIT') + 300]

        # Find all dates and Dr amounts
        dates = re.findall(r'(\d{2}/\d{2}/\d{4})', section)
        amounts = re.findall(r'([\d\s,]+\.\d{2})\s*Dr', section, re.IGNORECASE)

        statement_date = None
        payment_due_date = None
        total_due = 0.0
        min_due = 0.0

        if len(dates) >= 2:
            statement_date = dates[1]
        if len(dates) >= 3:
            payment_due_date = dates[2]

        if amounts:
            total_due = self._clean_amount_to_float(amounts[0])
            if len(amounts) > 1:
                min_due = self._clean_amount_to_float(amounts[1])
            else:
                min_due = total_due

        print(f"✅ Summary found: Statement={statement_date}, Due={payment_due_date}, Total={total_due}, Min={min_due}")
        return statement_date, payment_due_date, total_due, min_due

    def _extract_axis_limits(self, text: str):
        """Extract Credit Limit and Available Credit"""
        print("🔍 Extracting Axis credit limits...")
        # Example: 33467******7381 132,000.00 30,641.86 30,641.86
        match = re.search(r'\*{4,}\d{4}\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})', text)
        if match:
            credit_limit = self._clean_amount_to_float(match.group(1))
            available_credit = self._clean_amount_to_float(match.group(2))
            print(f"✅ Found limits: Credit={credit_limit}, Available={available_credit}")
            return credit_limit, available_credit
        print("❌ Credit limits not found")
        return 0.0, 0.0

    def _clean_amount_to_float(self, s: str) -> float:
        """Utility to clean currency strings"""
        if not s:
            return 0.0
        cleaned = re.sub(r'[^\d.\-]', '', s)
        try:
            return float(cleaned)
        except:
            return 0.0

    def _extract_axis_transactions(self, text: str) -> List[Dict]:
        """Enhanced Axis transaction extractor (multi-line + missing dates)"""
        print("🔍 Extracting Axis transactions (enhanced)...")
        transactions = []

        # Normalize text
        text = text.replace('\r', '').replace('\t', ' ')
        text = re.sub(r' {2,}', ' ', text)

        # Regex for transactions with date + Dr/Cr
        tx_pattern = re.compile(
            r'(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d\s,]+\.\d{2})\s*(Dr|Cr)\b',
            re.MULTILINE | re.IGNORECASE
        )

        # 1️⃣ Match direct transactions
        for match in tx_pattern.finditer(text):
            date, desc, amt_str, drcr = match.groups()
            amount = self._clean_amount_to_float(amt_str)
            if drcr.lower() == 'cr':
                amount = -amount
            transactions.append({
                'date': date,
                'description': desc.strip(),
                'amount': amount
            })

        # 2️⃣ Handle continuation lines (GST, EMI, etc.)
        last_date = None
        for line in text.splitlines():
            line = line.strip()
            if not line:
                continue
            # detect date
            m_date = re.match(r'(\d{2}/\d{2}/\d{4})', line)
            if m_date:
                last_date = m_date.group(1)
                continue
            # detect Dr/Cr line without date
            m_amount = re.search(r'([\d\s,]+\.\d{2})\s*(Dr|Cr)\b', line, re.IGNORECASE)
            if m_amount and last_date:
                amt = self._clean_amount_to_float(m_amount.group(1))
                if m_amount.group(2).lower() == 'cr':
                    amt = -amt
                desc = re.sub(r'([\d\s,]+\.\d{2})\s*(Dr|Cr)\b', '', line).strip()
                if desc:
                    transactions.append({'date': last_date, 'description': desc, 'amount': amt})

        # 3️⃣ Remove duplicates
        seen = set()
        unique = []
        for tx in transactions:
            key = (tx['date'], tx['description'][:30], tx['amount'])
            if key not in seen:
                seen.add(key)
                unique.append(tx)

        print(f"✅ Total Axis transactions found: {len(unique)}")
        return unique

    def _parse_idfc(self, text: str, pdf_path: str) -> CreditCardData:
        """IDFC FIRST Bank Credit Card Statement Parser (Dynamic Modular - Fixed)"""
        print("🔧 Using IDFC FIRST Bank parser (Enhanced Dynamic)...")

        cardholder_name = self._extract_idfc_name(text)
        card_last_4 = self._extract_idfc_card_last_4(text)
        statement_date, payment_due_date = self._extract_idfc_dates(text)
        total_amount_due, minimum_amount_due = self._extract_idfc_dues(text)
        credit_limit, available_credit, cash_limit = self._extract_idfc_limits(text)
        transactions = self._extract_idfc_transactions(text)

        return CreditCardData(
            bank_name='IDFC First',
            cardholder_name=cardholder_name,
            card_last_4=card_last_4,
            statement_date=statement_date,
            payment_due_date=payment_due_date,
            total_amount_due=total_amount_due,
            minimum_amount_due=minimum_amount_due,
            credit_limit=credit_limit,
            available_credit=available_credit,
            transactions=transactions
        )

    # ---------------- IDFC EXTRACTORS ----------------

    def _extract_idfc_name(self, text: str) -> str:
        print("🔍 Extracting IDFC cardholder name...")
        # Pattern 1: Look for name just above "Credit Card Statement"
        match = re.search(r'\n\s*([A-Z][A-Za-z\s]+)\n\s*Credit Card Statement', text)
        if match:
            name = match.group(1).strip()
            print(f"✅ Found IDFC name: '{name}'")
            return name
        # Pattern 2: Fallback - Look for "Customer Name :" followed by the name
        match = re.search(r'Customer Name\s*:\s*([A-Z][A-Za-z\s]+)', text)
        if match:
            name = match.group(1).strip()
            print(f"✅ Found IDFC name (fallback): '{name}'")
            return name
        print("❌ Cardholder name not found in IDFC statement")
        return "Not Found"


    def _extract_idfc_card_last_4(self, text: str) -> str:
        print("🔍 Extracting IDFC card last 4 digits...")
        match = re.search(r'\d{6}\*{6}(\d{4})', text)
        if match:
            last4 = match.group(1)
            print(f"✅ Found IDFC card last 4: {last4}")
            return last4
        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_idfc_dates_fixed(self, text: str) -> tuple:
        print("🔍 Extracting IDFC statement and due dates...")
        # Pattern: "Statement Date\n24/08/2021 11/09/2021"
        match = re.search(r'Statement\s*Date[\s\n]+(\d{2}/\d{2}/\d{4})\s+(\d{2}/\d{2}/\d{4})', text)
        if match:
            s_date, d_date = match.groups()
            print(f"✅ Found statement date: {s_date}, due date: {d_date}")
            return s_date, d_date
        print("❌ Dates not found")
        return "Not Found", "Not Found"

    def _extract_idfc_dues(self, text: str) -> tuple:
        print("🔍 Extracting IDFC total and minimum amount due...")
        # Example:
        # Total Amount Due Minimum Amount Due
        # r 29,147.25 r 1,457.36
        match = re.search(
            r'Total Amount Due\s+Minimum Amount Due\s*\n\s*[r₹]\s*([\d,]+\.\d{2})\s*[r₹]\s*([\d,]+\.\d{2})',
            text, re.IGNORECASE)
        if match:
            try:
                total = float(match.group(1).replace(',', ''))
                minimum = float(match.group(2).replace(',', ''))
                print(f"✅ Found total due: {total}, minimum due: {minimum}")
                return total, minimum
            except ValueError:
                pass
        # Fallback: Look for labels individually
        total_match = re.search(r'Total Amount Due\s*[:]?\s*[r₹]\s*([\d,]+\.\d{2})', text, re.IGNORECASE)
        min_match = re.search(r'Minimum Amount Due\s*[:]?\s*[r₹]\s*([\d,]+\.\d{2})', text, re.IGNORECASE)
        total = float(total_match.group(1).replace(',', '')) if total_match else 0.0
        minimum = float(min_match.group(1).replace(',', '')) if min_match else 0.0
        if total > 0 or minimum > 0:
             print(f"✅ Found total due (fallback): {total}, minimum due (fallback): {minimum}")
             return total, minimum

        print("❌ Dues not found")
        return 0.0, 0.0

    def _extract_idfc_limits(self, text: str) -> tuple:
        print("🔍 Extracting IDFC credit, available and cash limits...")
        # Example:
        # Credit Limit Available Credit Limit
        # r 1,92,000 r 1,62,852.75
        # Cash Limit r 19,200
        credit_match = re.search(
            r'Credit Limit\s+Available Credit Limit\s*\n\s*[r₹]\s*([\d,]+\.?\d*)\s*[r₹]\s*([\d,]+\.?\d*)',
            text, re.IGNORECASE)
        cash_match = re.search(r'Cash Limit\s*\n\s*[r₹]\s*([\d,]+\.?\d*)', text, re.IGNORECASE)

        credit, available, cash = 0.0, 0.0, 0.0

        if credit_match:
            try:
                credit = float(credit_match.group(1).replace(',', ''))
                available = float(credit_match.group(2).replace(',', ''))
            except ValueError:
                pass

        # Pattern 2: Look for "Credit Limit" and "Available Credit" on separate lines, possibly with other text
        match2 = re.search(
            r'Credit Limit\s*[:]?\s*[r₹]\s*([\d,]+\.?\d*).*?\n.*?Available Credit Limit\s*[:]?\s*[r₹]\s*([\d,]+\.?\d*)',
            text, re.IGNORECASE | re.DOTALL
        )
        if match2 and credit == 0.0 and available == 0.0: # Only use if primary pattern failed
            try:
                credit = float(match2.group(1).replace(',', ''))
                available = float(match2.group(2).replace(',', ''))
                print(f"✅ Found limits (pattern 2) → Credit: {credit}, Available: {available}")
            except ValueError:
                pass


        if cash_match:
            try:
                cash = float(cash_match.group(1).replace(',', ''))
            except ValueError:
                pass

        # Fallback: Look for labels individually (already present, but ensure it's the last resort)
        if credit == 0.0:
            credit_match_fallback = re.search(r'Credit Limit\s*[:]?\s*[r₹]\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
            if credit_match_fallback:
                 try: credit = float(credit_match_fallback.group(1).replace(',', ''))
                 except ValueError: pass
        if available == 0.0:
            available_match_fallback = re.search(r'Available Credit Limit\s*[:]?\s*[r₹]\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
            if available_match_fallback:
                 try: available = float(available_match_fallback.group(1).replace(',', ''))
                 except ValueError: pass
        if cash == 0.0:
            cash_match_fallback = re.search(r'Cash Limit\s*[:]?\s*[r₹]\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
            if cash_match_fallback:
                 try: cash = float(cash_match_fallback.group(1).replace(',', ''))
                 except ValueError: pass


        print(f"✅ Found limits → Credit: {credit}, Available: {available}, Cash: {cash}")
        return credit, available, cash

    def _extract_idfc_transactions(self, text: str) -> List[Dict]:
        print("🔍 Extracting IDFC transactions...")
        transactions = []

        # Extract section after "YOUR TRANSACTIONS"
        tx_section = re.split(r'YOUR TRANSACTIONS', text, flags=re.IGNORECASE)
        tx_text = tx_section[1] if len(tx_section) > 1 else text

        tx_pattern = re.compile(
            r'(\d{2}/\d{2}/\d{4})\s+([A-Za-z0-9\s,&.-]+?)\s+([\d,]+\.\d{2})\s*(CR)?',
            re.IGNORECASE
        )

        for match in tx_pattern.finditer(tx_text):
            date = match.group(1)
            desc = match.group(2).strip()
            amt = float(match.group(3).replace(',', ''))
            if match.group(4):
                amt = -amt
            transactions.append({
                'date': date,
                'description': desc,
                'amount': amt
            })
            print(f"✅ Found transaction: {date} - {desc[:30]}... - {amt}")

        print(f"✅ Total IDFC transactions found: {len(transactions)}")
        return transactions


    def _parse_indian_bank(self, text: str, pdf_path: str) -> CreditCardData:
        """Indian Bank parser"""
        print("🔧 Using Indian Bank parser...")
        # Implement Indian Bank specific extraction logic here
        # For now, returning default data
        return CreditCardData(**self._get_default_data('Indian Bank'))


    def _parse_generic(self, text: str, pdf_path: str) -> CreditCardData:
        """Generic parser for unknown banks"""
        print("🔧 Using generic parser...")
        # Implement generic extraction logic here
        # For now, returning default data
        return CreditCardData(**self._get_default_data('UNKNOWN'))

    def _get_default_data(self, bank_name: str) -> Dict:
        """Get default data structure for banks without specific parsers"""
        return {
            'bank_name': bank_name,
            'cardholder_name': "Not Found",
            'card_last_4': "Not Found",
            'statement_date': "Not Found",
            'payment_due_date': "Not Found",
            'total_amount_due': 0.0,
            'minimum_amount_due': 0.0,
            'credit_limit': 0.0,
            'available_credit': 0.0,
            'transactions': []
        }

class StatementAnalyzer:
    """Analyze and display parsed statement data"""

    @staticmethod
    def display_summary(data: CreditCardData):
        """Display parsed data in a clean format"""
        print("\n" + "="*60)
        print("💳 CREDIT CARD STATEMENT SUMMARY")
        print("="*60)
        print(f"🏦 Bank: {data.bank_name}")
        print(f"👤 Cardholder: {data.cardholder_name}")
        print(f"🔢 Card Number: **** **** **** {data.card_last_4}")
        print(f"📅 Statement Date: {data.statement_date}")
        print(f"⏰ Payment Due Date: {data.payment_due_date}")
        print(f"💰 Total Amount Due: ₹{data.total_amount_due:,.2f}")
        print(f"💸 Minimum Amount Due: ₹{data.minimum_amount_due:,.2f}")
        print(f"🎯 Credit Limit: ₹{data.credit_limit:,.2f}")
        print(f"💳 Available Credit: ₹{data.available_credit:,.2f}")
        print(f"📊 Transactions Count: {len(data.transactions)}")
        print("="*60)

        if data.transactions:
            print("\n📋 TRANSACTIONS:")
            print("-" * 80)
            print(f"{'Date':<12} {'Description':<45} {'Amount':>15}")
            print("-" * 80)
            for tx in data.transactions:
                desc = tx['description'][:42] + "..." if len(tx['description']) > 42 else tx['description']
                amount_prefix = "- ₹" if tx['amount'] < 0 else "₹"
                amount_display = abs(tx['amount'])
                print(f"{tx['date']:<12} {desc:<45} {amount_prefix}{amount_display:>12,.2f}")

            print("-" * 80)
            total_debits = sum(tx['amount'] for tx in data.transactions if tx['amount'] > 0)
            total_credits = sum(abs(tx['amount']) for tx in data.transactions if tx['amount'] < 0)
            print(f"{'Total Debits:':<57} ₹{total_debits:>12,.2f}")
            print(f"{'Total Credits:':<57} - ₹{total_credits:>12,.2f}")

# ===== MAIN EXECUTION =====
def main():
    """Main function to demonstrate the multi-bank parser"""
    parser = CreditCardParser()
    analyzer = StatementAnalyzer()

    # Upload PDF files
    from google.colab import files

    print("🚀 MULTI-BANK CREDIT CARD STATEMENT PARSER (FIXED)")
    print("Supported Banks: HDFC, ICICI, Axis, IDFC First, Indian Bank")
    print("="*60)
    print("📤 Upload your credit card statement PDFs...")

    uploaded = files.upload()

    all_statements = []

    for filename, content in uploaded.items():
        print(f"\n🔍 Processing: {filename}")

        with open(filename, 'wb') as f:
            f.write(content)

        try:
            statement_data = parser.parse_statement(filename)
            all_statements.append(statement_data)
            analyzer.display_summary(statement_data)
            os.remove(filename)

        except Exception as e:
            print(f"❌ Failed to process {filename}: {str(e)}")
            import traceback
            traceback.print_exc()

    print(f"\n🎯 Processing complete! Successfully parsed {len(all_statements)} statement(s).")

In [27]:
# Install required libraries
!pip install pdfplumber pandas

import pdfplumber
import re
import pandas as pd
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
import os

@dataclass
class CreditCardData:
    """Standardized data structure for all credit card statements"""
    bank_name: str
    cardholder_name: str
    card_last_4: str
    statement_date: str
    payment_due_date: str
    total_amount_due: float
    minimum_amount_due: float
    credit_limit: float
    available_credit: float
    transactions: List[Dict]

class CreditCardParser:
    """Main parser class with bank-specific adapters"""

    def __init__(self):
        self.bank_identifiers = {
            'HDFC': ['HDFC Bank', 'HDFC BANK', 'Paytm HDFC'],
            'ICICI': ['ICICI Bank', 'ICICI BANK', 'ICICI CARD'],
            'Axis': ['AXIS BANK', 'Axis Bank', 'Axis Cards', 'Flipkart Axis Bank'],
            'IDFC First': ['IDFC FIRST', 'IDFC FIRST BANK', 'IDFC Bank'],
            'Indian Bank': ['Indian Bank', 'INDIAN BANK', 'IBGCC']
        }

    def identify_bank(self, text: str) -> str:
        """Identify which bank issued the statement"""
        text_upper = text.upper()
        for bank, identifiers in self.bank_identifiers.items():
            for identifier in identifiers:
                if identifier.upper() in text_upper:
                    return bank
        return "UNKNOWN"

    def parse_statement(self, pdf_path: str) -> CreditCardData:
        """Main parsing function"""
        try:
            with pdfplumber.open(pdf_path) as pdf:
                text = self._extract_text(pdf)

                if not text.strip():
                    raise ValueError("No text found in PDF")

                bank_name = self.identify_bank(text)
                print(f"🔍 Identified Bank: {bank_name}")

                # Route to bank-specific parser
                if bank_name == "HDFC":
                    return self._parse_hdfc_fixed(text, pdf_path)
                elif bank_name == "ICICI":
                    return self._parse_icici_improved(text, pdf_path)
                elif bank_name == "Axis":
                    return self._parse_axis(text, pdf_path)
                elif bank_name == "IDFC First":
                    return self._parse_idfc(text, pdf_path)
                elif bank_name == "Indian Bank":
                    return self._parse_indian_bank(text, pdf_path)
                else:
                    return self._parse_generic(text, pdf_path)

        except Exception as e:
            print(f"❌ Failed to process {pdf_path}: {str(e)}")
            raise

    def _extract_text(self, pdf) -> str:
        """Extract text from PDF with multiple strategies"""
        text = ""
        for page in pdf.pages:
            page_text = page.extract_text(layout=True) or page.extract_text() or ""
            text += page_text + "\n"
        return text

    # ===== FIXED HDFC BANK PARSER =====

    def _parse_hdfc_fixed(self, text: str, pdf_path: str) -> CreditCardData:
        """HDFC Bank statement parser - FIXED VERSION"""
        print("🔧 Using HDFC Bank parser (Fixed)...")

        cardholder_name = self._extract_hdfc_name_fixed(text)
        card_last_4 = self._extract_hdfc_card_last_4_fixed(text)
        statement_date = self._extract_hdfc_statement_date_fixed(text)
        payment_due_date = self._extract_hdfc_due_date_fixed(text)
        total_amount_due = self._extract_hdfc_total_due_fixed(text)
        minimum_amount_due = self._extract_hdfc_min_due_fixed(text)
        credit_limit = self._extract_hdfc_credit_limit_fixed(text)
        available_credit = self._extract_hdfc_available_credit_fixed(text)
        transactions = self._extract_hdfc_transactions_fixed(text)

        data = {
            'bank_name': 'HDFC',
            'cardholder_name': cardholder_name,
            'card_last_4': card_last_4,
            'statement_date': statement_date,
            'payment_due_date': payment_due_date,
            'total_amount_due': total_amount_due,
            'minimum_amount_due': minimum_amount_due,
            'credit_limit': credit_limit,
            'available_credit': available_credit,
            'transactions': transactions
        }
        return CreditCardData(**data)

    def _extract_hdfc_name_fixed(self, text: str) -> str:
        """Extract cardholder name from HDFC statement - FIXED"""
        print("🔍 Extracting HDFC name...")

        # Pattern 1: Look for "Name : NAME" pattern (most reliable)
        match = re.search(r'Name\s*:\s*([A-Z][A-Za-z\s]+?)(?:\n|Email)', text, re.IGNORECASE)
        if match:
            name = match.group(1).strip()
            # Clean up - remove any trailing non-letter characters
            name = re.sub(r'[^A-Za-z\s]+$', '', name).strip()
            if name and len(name) > 2:
                print(f"✅ Found HDFC name: '{name}'")
                return name

        # Pattern 2: Look for name in the header section before "000Paytm"
        match = re.search(r'Name\s*:\s*([A-Z\s]+)\s*\n\s*000', text)
        if match:
            name = match.group(1).strip()
            name = re.sub(r'[^A-Za-z\s]+$', '', name).strip()
            if name and len(name) > 2:
                print(f"✅ Found HDFC name (pattern 2): '{name}'")
                return name

        # Pattern 3: Extract from transaction section header (before first transaction date)
        # Look for "Domestic Transactions" followed by headers, then the name line
        match = re.search(r'Domestic Transactions\s+Date\s+Transaction Description\s+Amount.*?\n\s*([A-Z][A-Z\s]+[A-Z])\s*\n\s*\d{2}/\d{2}/\d{4}', text, re.DOTALL)
        if match:
            name = match.group(1).strip()
            # Ensure it's a valid name (not a transaction description)
            if name and len(name.split()) >= 2 and not any(word in name for word in ['PAYTM', 'TRANSACTION', 'AMOUNT', 'DATE', 'NOIDA', 'DELHI']):
                print(f"✅ Found HDFC name from transactions: '{name}'")
                return name

        print("❌ Name not found in HDFC statement")
        return "Not Found"

    def _extract_hdfc_card_last_4_fixed(self, text: str) -> str:
        """Extract last 4 digits from HDFC statement - FIXED"""
        print("🔍 Extracting HDFC card last 4...")

        # Pattern: Card No: 4695 25XX XXXX 3458
        pattern1 = r'Card No:\s*\d{4}\s*\d{2}XX\s*XXXX\s*(\d{4})'
        match = re.search(pattern1, text)
        if match:
            last_4 = match.group(1)
            print(f"✅ Found HDFC card last 4: {last_4}")
            return last_4

        # Alternative pattern
        pattern2 = r'\d{4}\s+\d{2}X+\s+X+\s+(\d{4})'
        match = re.search(pattern2, text)
        if match:
            last_4 = match.group(1)
            print(f"✅ Found HDFC card last 4: {last_4}")
            return last_4

        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_hdfc_statement_date_fixed(self, text: str) -> str:
        """Extract statement date from HDFC - FIXED"""
        print("🔍 Extracting HDFC statement date...")

        # Pattern: Statement Date:12/03/2023
        pattern = r'Statement Date:\s*(\d{2}/\d{2}/\d{4})'
        match = re.search(pattern, text)
        if match:
            date = match.group(1)
            print(f"✅ Found HDFC statement date: {date}")
            return date

        print("❌ Statement date not found")
        return "Not Found"

    def _extract_hdfc_due_date_fixed(self, text: str) -> str:
        """Extract payment due date from HDFC - FIXED"""
        print("🔍 Extracting HDFC payment due date...")

        # Pattern: Payment Due Date at the start of line followed by Total Dues
        pattern1 = r'Payment Due Date\s+Total Dues.*?\n(\d{2}/\d{2}/\d{4})'
        match = re.search(pattern1, text, re.DOTALL)
        if match:
            date = match.group(1)
            print(f"✅ Found HDFC payment due date: {date}")
            return date

        # Alternative: Look for date in the summary section
        pattern2 = r'(\d{2}/\d{2}/\d{4})\s+[\d,]+\.[\d]{2}\s+[\d,]+\.[\d]{2}'
        match = re.search(pattern2, text)
        if match:
            date = match.group(1)
            print(f"✅ Found HDFC payment due date (alt): {date}")
            return date

        print("❌ Payment due date not found")
        return "Not Found"

    def _extract_hdfc_total_due_fixed(self, text: str) -> float:
        """Extract total amount due from HDFC - FIXED"""
        print("🔍 Extracting HDFC total amount due...")

        # Look for the summary table with Payment Due Date, Total Dues, Minimum Amount Due
        pattern1 = r'(\d{2}/\d{2}/\d{4})\s+([\d,]+\.[\d]{2})\s+([\d,]+\.[\d]{2})'
        match = re.search(pattern1, text)
        if match:
            amount_str = match.group(2).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC total amount due: {amount}")
                return amount
            except ValueError:
                pass

        # Alternative: Look for "Total Dues" label
        pattern2 = r'Total Dues[^\d]+([\d,]+\.[\d]{2})'
        match = re.search(pattern2, text)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC total amount due (alt): {amount}")
                return amount
            except ValueError:
                pass

        print("❌ Total amount due not found")
        return 0.0

    def _extract_hdfc_min_due_fixed(self, text: str) -> float:
        """Extract minimum amount due from HDFC - FIXED"""
        print("🔍 Extracting HDFC minimum amount due...")

        # Look for the third column in the summary table
        pattern1 = r'(\d{2}/\d{2}/\d{4})\s+([\d,]+\.[\d]{2})\s+([\d,]+\.[\d]{2})'
        match = re.search(pattern1, text)
        if match:
            amount_str = match.group(3).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC minimum amount due: {amount}")
                return amount
            except ValueError:
                pass

        # Alternative: Look for "Minimum Amount Due" label
        pattern2 = r'Minimum Amount Due[^\d]+([\d,]+\.[\d]{2})'
        match = re.search(pattern2, text)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC minimum amount due (alt): {amount}")
                return amount
            except ValueError:
                pass

        print("❌ Minimum amount due not found")
        return 0.0

    def _extract_hdfc_credit_limit_fixed(self, text: str) -> float:
        """Extract credit limit from HDFC - FIXED"""
        print("🔍 Extracting HDFC credit limit...")

        # Pattern 1: Credit Limit in header row with values below
        # Credit Limit Available Credit Limit Available Cash Limit
        # 30,000 0.00
        match = re.search(r'Credit Limit\s+Available Credit Limit\s+Available Cash Limit\s*\n\s*([\d,]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC credit limit: {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 2: Look for inline format "Credit Limit | amount"
        match = re.search(r'Credit Limit\s*\|\s*([\d,]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC credit limit (pattern 2): {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 3: Look for "Credit Limit" followed by amount on same line
        match = re.search(r'Credit Limit[^\d\n]*([\d,]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                # Verify it's a reasonable credit limit (between 1000 and 100 crores)
                if 1000 <= amount <= 1000000000:
                    print(f"✅ Found HDFC credit limit (pattern 3): {amount}")
                    return amount
            except ValueError:
                pass

        print("❌ Credit limit not found")
        return 0.0

    def _extract_hdfc_available_credit_fixed(self, text: str) -> float:
        """Extract available credit from HDFC - FIXED"""
        print("🔍 Extracting HDFC available credit...")

        # Pattern 1: Available Credit Limit in header row with values below
        # Credit Limit Available Credit Limit Available Cash Limit
        # 30,000 0.00 [third value]
        # Match: Credit Limit (no decimal) or with decimal, whitespace, then Available Credit (second number)
        match = re.search(r'Credit Limit\s+Available Credit Limit\s+Available Cash Limit\s*\n\s*([\d,]+)(?:\.[\d]+)?\s+([\d,]+\.[\d]+)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(2).replace(',', '')  # Get second number (Available Credit)
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC available credit: {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 2: Simpler - just find two consecutive numbers after the header
        match = re.search(r'Credit Limit\s+Available Credit Limit.*?\n\s*[\d,]+(?:\.\d+)?\s+([\d,]+\.\d+)', text, re.IGNORECASE | re.DOTALL)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC available credit (pattern 2): {amount}")
                return amount
            except ValueError:
                pass

        # Pattern 3: Look for inline format "Available Credit Limit | amount"
        match = re.search(r'Available Credit Limit\s*\|\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found HDFC available credit (pattern 3): {amount}")
                return amount
            except ValueError:
                pass

        print("❌ Available credit not found")
        return 0.0

    def _extract_hdfc_transactions_fixed(self, text: str) -> List[Dict]:
        """Extract transactions from HDFC statement - FIXED"""
        print("🔍 Extracting HDFC transactions...")
        transactions = []

        # Find the Domestic Transactions section
        pattern = r'Domestic Transactions\s+Date\s+Transaction Description\s+Amount.*?(?=Reward Points|$)'
        domestic_match = re.search(pattern, text, re.DOTALL)

        if not domestic_match:
            print("❌ Could not find Domestic Transactions section")
            return transactions

        transaction_text = domestic_match.group(0)
        lines = transaction_text.split('\n')

        excluded_keywords = ['Domestic Transactions', 'Date', 'Transaction Description', 'Amount']

        for line in lines:
            line = line.strip()

            # Skip header lines and empty lines
            if not line or any(keyword in line for keyword in excluded_keywords):
                continue

            # Skip lines with cardholder name (they appear before transactions)
            if re.match(r'^[A-Z][A-Za-z\s]+[A-Z]$', line):
                continue

            # Pattern: DD/MM/YYYY Description Amount (with optional Cr)
            tx_pattern = r'(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,]+\.[\d]{2})(\s+Cr)?$'
            match = re.match(tx_pattern, line)

            if match:
                date = match.group(1)
                description = match.group(2).strip()
                amount_str = match.group(3).replace(',', '')
                is_credit = match.group(4) is not None

                try:
                    amount = float(amount_str)
                    # If it's a credit (Cr), make it negative
                    if is_credit:
                        amount = -amount

                    transactions.append({
                        'date': date,
                        'description': description,
                        'amount': amount
                    })
                    print(f"✅ Found transaction: {date} - {description[:30]}... - {amount}")
                except ValueError:
                    continue

        print(f"✅ Total HDFC transactions found: {len(transactions)}")
        return transactions

    # ===== ICICI BANK PARSER (FIXED) =====

    def _parse_icici_improved(self, text: str, pdf_path: str) -> CreditCardData:
        """ICICI Bank statement parser - FIXED VERSION"""
        print("🔧 Using ICICI Bank parser (Fixed)...")

        cardholder_name = self._extract_icici_name(text)
        card_last_4 = self._extract_icici_card_last_4(text)
        statement_date = self._extract_icici_statement_date(text)
        payment_due_date = self._extract_icici_due_date(text)
        total_amount_due = self._extract_icici_total_due(text)
        minimum_amount_due = self._extract_icici_min_due(text)
        credit_limit = self._extract_icici_credit_limit(text)
        available_credit = self._extract_icici_available_credit(text)
        transactions = self._extract_icici_transactions(text)

        data = {
            'bank_name': 'ICICI',
            'cardholder_name': cardholder_name,
            'card_last_4': card_last_4,
            'statement_date': statement_date,
            'payment_due_date': payment_due_date,
            'total_amount_due': total_amount_due,
            'minimum_amount_due': minimum_amount_due,
            'credit_limit': credit_limit,
            'available_credit': available_credit,
            'transactions': transactions
        }
        return CreditCardData(**data)

    def _extract_icici_name(self, text: str) -> str:
        """Extract cardholder name from ICICI statement"""
        print("🔍 Extracting ICICI name...")
        match = re.search(r'((?:MR|MS|MRS|DR)\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\n\s*(?:AT/PO|FLAT|HOUSE|[A-Z\s,/]+\n)', text)
        if match:
            name = match.group(1).strip()
            print(f"✅ Found ICICI name: '{name}'")
            return name
        print("❌ Name not found in ICICI statement")
        return "Not Found"

    def _extract_icici_card_last_4(self, text: str) -> str:
        """Extract last 4 digits from ICICI statement"""
        print("🔍 Extracting ICICI card last 4...")
        match = re.search(r'\d{4}X+(\d{4})', text)
        if match:
            last_4 = match.group(1)
            print(f"✅ Found ICICI card last 4: {last_4}")
            return last_4
        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_icici_statement_date(self, text: str) -> str:
        """Extract statement date from ICICI"""
        print("🔍 Extracting ICICI statement date...")
        match = re.search(r'STATEMENT DATE.*?([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text, re.IGNORECASE | re.DOTALL)
        if match:
            date = match.group(1)
            print(f"✅ Found ICICI statement date: {date}")
            return date
        match = re.search(r'Statement period\s*:\s*[A-Za-z]+\s+\d{1,2},\s+\d{4}\s+to\s+([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text)
        if match:
            date = match.group(1)
            print(f"✅ Found ICICI statement date (pattern 2): {date}")
            return date
        print("❌ Statement date not found")
        return "Not Found"

    def _extract_icici_due_date(self, text: str) -> str:
        """Extract payment due date from ICICI"""
        print("🔍 Extracting ICICI payment due date...")
        match = re.search(r'PAYMENT DUE DATE.*?([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text, re.IGNORECASE | re.DOTALL)
        if match:
            date = match.group(1).strip()
            print(f"✅ Found ICICI payment due date: {date}")
            return date
        # Alternative: Look for two dates near each other
        dates = re.findall(r'([A-Z][a-z]+\s+\d{1,2},\s+\d{4})', text[:1000])
        if len(dates) >= 2:
            date = dates[1]  # Second date is usually due date
            print(f"✅ Found ICICI payment due date (pattern 2): {date}")
            return date
        print("❌ Payment due date not found")
        return "Not Found"

    def _extract_icici_total_due(self, text: str) -> float:
        """Extract total amount due from ICICI"""
        print("🔍 Extracting ICICI total amount due...")
        match = re.search(r'Total Amount due\s+[`₹]?\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if match:
            amount_str = match.group(1).replace(',', '').replace('`', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found ICICI total amount due: {amount}")
                return amount
            except ValueError:
                pass
        print("❌ Total amount due not found")
        return 0.0

    def _extract_icici_min_due(self, text: str) -> float:
        """Extract minimum amount due from ICICI"""
        print("🔍 Extracting ICICI minimum amount due...")

        # 1) Try explicit patterns first (common formats)
        patterns = [
            r'Minimum\s+Amount\s+due\s*[:\-]?\s*[`₹]?\s*([\d,]+\.?\d*)',
            r'Minimum\s+Amount\s*[:\-]?\s*[`₹]?\s*([\d,]+\.?\d*)',
            r'Minimum\s+Amount\s+Payable\s*[:\-]?\s*[`₹]?\s*([\d,]+\.?\d*)',
            r'Amount\s+Due\s+\(Minimum\)\s*[`₹]?\s*([\d,]+\.?\d*)'
        ]
        for pat in patterns:
            match = re.search(pat, text, re.IGNORECASE)
            if match:
                amount_str = match.group(1).replace(',', '').replace('`', '')
                try:
                    amount = float(amount_str)
                    print(f"✅ Found ICICI minimum amount due (pattern): {amount}")
                    return amount
                except ValueError:
                    continue

        # 2) Prefer exact label position: find 'Minimum Amount due' and then look on the same line or next few lines
        label_match = re.search(r'Minimum\s+Amount\s+due', text, re.IGNORECASE)
        if label_match:
            # Split into lines and find the line index containing the label
            lines = text.splitlines()
            # compute character position to line index mapping
            char_index = 0
            found_line_idx = None
            for idx, ln in enumerate(lines):
                start = char_index
                end = char_index + len(ln) + 1
                if start <= label_match.start() < end:
                    found_line_idx = idx
                    break
                char_index = end

            if found_line_idx is not None:
                # scan same line and up to next 3 non-empty lines for the first numeric value
                for offset in range(0, 4):
                    li = found_line_idx + offset
                    if li >= len(lines):
                        break
                    line = lines[li].strip()
                    nums = re.findall(r'[`₹]?\s*([\d,]+\.?\d*)', line)
                    for n in nums:
                        try:
                            val = float(n.replace(',', '').replace('`', ''))
                            if val > 0:
                                print(f"✅ Found ICICI minimum amount due (near label): {val}")
                                return val
                        except ValueError:
                            continue

        # 3) Window fallback around 'Total Amount due': pick the first reasonable numeric after the "Minimum" label if present,
        #    otherwise pick the first positive number in the window that is clearly not the total (i.e., smaller than total if total exists)
        total = None
        total_match = re.search(r'Total\s+Amount\s+due\s*[`₹]?\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if total_match:
            try:
                total = float(total_match.group(1).replace(',', '').replace('`', ''))
            except ValueError:
                total = None

        # find the first 'Minimum' occurrence line index, else use the first 'Total Amount due' match position
        anchor_pos = None
        m2 = re.search(r'Minimum\s+Amount\s+due', text, re.IGNORECASE)
        if m2:
            anchor_pos = m2.end()
        elif total_match:
            anchor_pos = total_match.end()

        if anchor_pos is not None:
            window_start = max(0, anchor_pos - 200)
            window_end = min(len(text), anchor_pos + 400)
            window = text[window_start:window_end]
            nums = re.findall(r'[`₹]?\s*([\d,]+\.?\d*)', window)
            cleaned = []
            for n in nums:
                try:
                    cleaned.append(float(n.replace(',', '').replace('`', '')))
                except ValueError:
                    continue
            # Prefer the first positive number after the label in the window
            if cleaned:
                for v in cleaned:
                    if v > 0 and (total is None or v <= total):
                        print(f"✅ Found ICICI minimum amount due (window heuristic): {v}")
                        return v

        # 4) Last resort: search lines containing 'MINIMUM' and parse numbers there
        for line in text.splitlines():
            if 'MINIMUM' in line.upper() or 'MIN DUE' in line.upper():
                nums = re.findall(r'[`₹]?\s*([\d,]+\.?\d*)', line)
                for n in nums:
                    try:
                        val = float(n.replace(',', '').replace('`', ''))
                        if val > 0:
                            print(f"✅ Found ICICI minimum amount due (line heuristic): {val}")
                            return val
                    except ValueError:
                        continue

        print("❌ Minimum amount due not found")
        return 0.0

    def _extract_icici_credit_limit(self, text: str) -> float:
        """Extract credit limit from ICICI"""
        print("🔍 Extracting ICICI credit limit...")
        # Pattern for table format
        match = re.search(
            r"Credit Limit \(Including cash\)\s+Available Credit.*?[`₹]\s*([\d,]+\.?\d*)",
            text,
            re.IGNORECASE | re.DOTALL
        )
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found ICICI credit limit: {amount}")
                return amount
            except ValueError:
                pass
        print("❌ Credit limit not found")
        return 0.0

    def _extract_icici_available_credit(self, text: str) -> float:
        """Extract available credit from ICICI"""
        print("🔍 Extracting ICICI available credit...")
        # Look for the pattern with both credit limit and available credit
        match = re.search(
            r"Credit Limit \(Including cash\)\s+Available Credit \(Including cash\).*?[`₹]\s*[\d,]+\.?\d*\s+[`₹]\s*([\d,]+\.?\d*)",
            text,
            re.IGNORECASE | re.DOTALL
        )
        if match:
            amount_str = match.group(1).replace(',', '')
            try:
                amount = float(amount_str)
                print(f"✅ Found ICICI available credit: {amount}")
                return amount
            except ValueError:
                pass
        print("❌ Available credit not found")
        return 0.0

    def _extract_icici_transactions(self, text: str) -> List[Dict]:
        """Extract transactions from ICICI statement - FIXED for multi-page"""
        print("🔍 Extracting ICICI transactions...")
        transactions = []

        # Pattern to match ICICI transactions
        # Format: DD/MM/YYYY SerialNumber Description Amount [CR]
        pattern = re.compile(
            r'(\d{2}/\d{2}/\d{4})\s+'  # Date
            r'(\d+)\s+'  # Serial number
            r'(.+?)\s+'  # Description
            r'(?:IN\s+)?'  # Optional "IN"
            r'([\d,]+\.?\d*)\s*'  # Amount
            r'(CR)?'  # Optional CR
            r'(?:\s*$)',  # End of line
            re.MULTILINE
        )

        matches = pattern.findall(text)

        for match in matches:
            date, serial, description, amount_str, is_credit = match

            # Clean description
            description = description.strip()
            description = re.sub(r'\s+', ' ', description)

            # Skip headers
            if any(keyword in description.upper() for keyword in
                   ['TRANSACTION DETAILS', 'DATE', 'SERNO', 'AMOUNT', 'INTL', 'STATEMENT']):
                continue

            try:
                amount = float(amount_str.replace(',', ''))

                # Credits should be negative
                if is_credit:
                    amount = -amount

                transactions.append({
                    'date': date,
                    'description': description,
                    'amount': amount
                })
                print(f"✅ Found transaction: {date} - {description[:30]}... - {amount}")
            except ValueError:
                continue

        # Remove duplicates
        seen = set()
        unique_transactions = []
        for tx in transactions:
            key = (tx['date'], tx['description'], tx['amount'])
            if key not in seen:
                seen.add(key)
                unique_transactions.append(tx)

        print(f"✅ Total ICICI transactions found: {len(unique_transactions)}")
        return unique_transactions


    def _parse_axis(self, text: str, pdf_path: str) -> CreditCardData:
        """Axis Bank Credit Card Statement Parser (Dynamic & Fixed)"""
        print("🔧 Using Axis Bank parser (Fixed)...")

        # --- Extract fields ---
        cardholder_name = self._extract_axis_name(text)
        card_last_4 = self._extract_axis_card_last_4(text)
        statement_date = self._extract_axis_statement_date(text)
        payment_due_date = self._extract_axis_due_date(text)
        total_amount_due = self._extract_axis_total_due(text)
        minimum_amount_due = self._extract_axis_min_due(text)
        credit_limit = self._extract_axis_credit_limit(text)
        available_credit = self._extract_axis_available_credit(text)
        transactions = self._extract_axis_transactions(text)

        data = {
            'bank_name': 'Axis Bank',
            'cardholder_name': cardholder_name,
            'card_last_4': card_last_4,
            'statement_date': statement_date,
            'payment_due_date': payment_due_date,
            'total_amount_due': total_amount_due,
            'minimum_amount_due': minimum_amount_due,
            'credit_limit': credit_limit,
            'available_credit': available_credit,
            'transactions': transactions
        }
        return CreditCardData(**data)

    # ========== Axis Bank Field Extractors ==========

    # ===================== AXIS BANK PARSER (REPLACEMENT) =====================

    def _parse_axis(self, text: str, pdf_path: str) -> CreditCardData:
        """Axis Bank Credit Card Statement Parser (Robust Enhanced Version)"""
        print("🔧 Using Axis Bank parser (Robust Enhanced)...")

        cardholder_name = self._extract_axis_name(text)
        card_last_4 = self._extract_axis_card_last_4(text)
        statement_date, payment_due_date, total_amount_due, minimum_amount_due = self._extract_axis_payment_summary(text)
        credit_limit, available_credit = self._extract_axis_limits(text)
        transactions = self._extract_axis_transactions(text)

        return CreditCardData(
            bank_name='Axis Bank',
            cardholder_name=cardholder_name,
            card_last_4=card_last_4,
            statement_date=statement_date or "Not Found",
            payment_due_date=payment_due_date or "Not Found",
            total_amount_due=total_amount_due,
            minimum_amount_due=minimum_amount_due,
            credit_limit=credit_limit,
            available_credit=available_credit,
            transactions=transactions
        )

    # ---------------- Axis Extractors ----------------

    def _extract_axis_name(self, text: str) -> str:
        print("🔍 Extracting Axis cardholder name...")
        # Common pattern: uppercase name before address
        match = re.search(r'\n([A-Z][A-Z\s,.-]+)\nB/', text)
        if match:
            name = re.sub(r'\s+', ' ', match.group(1).strip())
            print(f"✅ Found Axis name: '{name}'")
            return name

        # Fallback: first uppercase line that looks like a name
        for line in text.splitlines():
            ln = line.strip()
            if ln.isupper() and len(ln.split()) >= 2 and len(ln) < 60:
                if not any(k in ln for k in ['AXIS', 'STATEMENT', 'PAYMENT', 'SUMMARY']):
                    print(f"✅ Found Axis name (fallback): '{ln}'")
                    return re.sub(r'\s+', ' ', ln)
        print("❌ Name not found")
        return "Not Found"

    def _extract_axis_card_last_4(self, text: str) -> str:
        print("🔍 Extracting Axis card last 4 digits...")
        match = re.search(r'(\d{6}\*{6}(\d{4}))|(\*{6}(\d{4}))', text)
        if match:
            for g in match.groups():
                if g and re.fullmatch(r'\d{4}', g):
                    print(f"✅ Found Axis card last 4: {g}")
                    return g
        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_axis_payment_summary(self, text: str):
        """Extract statement date, due date, total/min due from Payment Summary block"""
        print("🔍 Extracting Axis payment summary...")
        section = text[text.upper().find('PAYMENT SUMMARY'):text.upper().find('AUTO-DEBIT') + 300]

        # Find all dates and Dr amounts
        dates = re.findall(r'(\d{2}/\d{2}/\d{4})', section)
        amounts = re.findall(r'([\d\s,]+\.\d{2})\s*Dr', section, re.IGNORECASE)

        statement_date = None
        payment_due_date = None
        total_due = 0.0
        min_due = 0.0

        if len(dates) >= 2:
            statement_date = dates[1]
        if len(dates) >= 3:
            payment_due_date = dates[2]

        if amounts:
            total_due = self._clean_amount_to_float(amounts[0])
            if len(amounts) > 1:
                min_due = self._clean_amount_to_float(amounts[1])
            else:
                min_due = total_due

        print(f"✅ Summary found: Statement={statement_date}, Due={payment_due_date}, Total={total_due}, Min={min_due}")
        return statement_date, payment_due_date, total_due, min_due

    def _extract_axis_limits(self, text: str):
        """Extract Credit Limit and Available Credit"""
        print("🔍 Extracting Axis credit limits...")
        # Example: 33467******7381 132,000.00 30,641.86 30,641.86
        match = re.search(r'\*{4,}\d{4}\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})', text)
        if match:
            credit_limit = self._clean_amount_to_float(match.group(1))
            available_credit = self._clean_amount_to_float(match.group(2))
            print(f"✅ Found limits: Credit={credit_limit}, Available={available_credit}")
            return credit_limit, available_credit
        print("❌ Credit limits not found")
        return 0.0, 0.0

    def _clean_amount_to_float(self, s: str) -> float:
        """Utility to clean currency strings"""
        if not s:
            return 0.0
        cleaned = re.sub(r'[^\d.\-]', '', s)
        try:
            return float(cleaned)
        except:
            return 0.0

    def _extract_axis_transactions(self, text: str) -> List[Dict]:
        """Enhanced Axis transaction extractor (multi-line + missing dates)"""
        print("🔍 Extracting Axis transactions (enhanced)...")
        transactions = []

        # Normalize text
        text = text.replace('\r', '').replace('\t', ' ')
        text = re.sub(r' {2,}', ' ', text)

        # Regex for transactions with date + Dr/Cr
        tx_pattern = re.compile(
            r'(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d\s,]+\.\d{2})\s*(Dr|Cr)\b',
            re.MULTILINE | re.IGNORECASE
        )

        # 1️⃣ Match direct transactions
        for match in tx_pattern.finditer(text):
            date, desc, amt_str, drcr = match.groups()
            amount = self._clean_amount_to_float(amt_str)
            if drcr.lower() == 'cr':
                amount = -amount
            transactions.append({
                'date': date,
                'description': desc.strip(),
                'amount': amount
            })

        # 2️⃣ Handle continuation lines (GST, EMI, etc.)
        last_date = None
        for line in text.splitlines():
            line = line.strip()
            if not line:
                continue
            # detect date
            m_date = re.match(r'(\d{2}/\d{2}/\d{4})', line)
            if m_date:
                last_date = m_date.group(1)
                continue
            # detect Dr/Cr line without date
            m_amount = re.search(r'([\d\s,]+\.\d{2})\s*(Dr|Cr)\b', line, re.IGNORECASE)
            if m_amount and last_date:
                amt = self._clean_amount_to_float(m_amount.group(1))
                if m_amount.group(2).lower() == 'cr':
                    amt = -amt
                desc = re.sub(r'([\d\s,]+\.\d{2})\s*(Dr|Cr)\b', '', line).strip()
                if desc:
                    transactions.append({'date': last_date, 'description': desc, 'amount': amt})

        # 3️⃣ Remove duplicates
        seen = set()
        unique = []
        for tx in transactions:
            key = (tx['date'], tx['description'][:30], tx['amount'])
            if key not in seen:
                seen.add(key)
                unique.append(tx)

        print(f"✅ Total Axis transactions found: {len(unique)}")
        return unique

    # ===== COMPLETE IDFC FIRST BANK PARSER - COPY THIS SECTION =====
# Replace the entire IDFC section in your CreditCardParser class with this code

    def _parse_idfc(self, text: str, pdf_path: str) -> CreditCardData:
        """IDFC FIRST Bank Credit Card Statement Parser (Fixed & Enhanced)"""
        print("🔧 Using IDFC FIRST Bank parser (Fixed & Enhanced)...")

        cardholder_name = self._extract_idfc_name(text)
        card_last_4 = self._extract_idfc_card_last_4(text)
        statement_date, payment_due_date = self._extract_idfc_dates(text)
        total_amount_due, minimum_amount_due = self._extract_idfc_dues(text)
        credit_limit, available_credit, cash_limit = self._extract_idfc_limits(text)
        transactions = self._extract_idfc_transactions(text)

        return CreditCardData(
            bank_name='IDFC First',
            cardholder_name=cardholder_name,
            card_last_4=card_last_4,
            statement_date=statement_date,
            payment_due_date=payment_due_date,
            total_amount_due=total_amount_due,
            minimum_amount_due=minimum_amount_due,
            credit_limit=credit_limit,
            available_credit=available_credit,
            transactions=transactions
        )

    def _extract_idfc_name(self, text: str) -> str:
        print("🔍 Extracting IDFC cardholder name...")

        # Pattern 1: Name at the top before address
        match = re.search(r'^([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\n\s*Credit Card Statement', text, re.MULTILINE)
        if match:
            name = match.group(1).strip()
            print(f"✅ Found IDFC name: '{name}'")
            return name

        # Pattern 2: Customer Name in transactions section (FIXED - remove "Card Number" suffix)
        match = re.search(r'Customer Name\s*:\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)', text)
        if match:
            name = match.group(1).strip()
            # Remove any trailing "Card Number" text
            name = re.sub(r'\s+Card\s+Number.*$', '', name, flags=re.IGNORECASE)
            print(f"✅ Found IDFC name (pattern 2): '{name}'")
            return name

        # Pattern 3: Name before "Credit Card Statement" (more flexible)
        match = re.search(r'\n([A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+){1,3})\s*\n.*?Credit Card Statement', text, re.DOTALL)
        if match:
            name = match.group(1).strip()
            # Validate it's not a keyword
            if not any(kw in name.upper() for kw in ['ALWAYS', 'FIRST', 'BANK', 'STATEMENT', 'CARD NUMBER']):
                print(f"✅ Found IDFC name (pattern 3): '{name}'")
                return name

        print("❌ Cardholder name not found in IDFC statement")
        return "Not Found"

    def _extract_idfc_card_last_4(self, text: str) -> str:
        print("🔍 Extracting IDFC card last 4 digits...")

        # Pattern 1: Standard masked format
        match = re.search(r'(\d{6}\*{6}(\d{4}))|(\*{6}(\d{4}))', text)
        if match:
            for g in match.groups():
                if g and re.fullmatch(r'\d{4}', g):
                    print(f"✅ Found IDFC card last 4: {g}")
                    return g

        # Pattern 2: Card Number with asterisks
        match = re.search(r'Card Number\s*:?\s*\d+\*+(\d{4})', text)
        if match:
            last4 = match.group(1)
            print(f"✅ Found IDFC card last 4 (pattern 2): {last4}")
            return last4

        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_idfc_dates(self, text: str) -> tuple:
        print("🔍 Extracting IDFC statement and due dates...")

        # Pattern 1: Statement Date row with two dates
        # "Statement Date\n24/08/2021 11/09/2021"
        match = re.search(r'Statement\s+Date\s*\n\s*(\d{2}/\d{2}/\d{4})\s+(\d{2}/\d{2}/\d{4})', text, re.IGNORECASE)
        if match:
            s_date, d_date = match.groups()
            print(f"✅ Found statement date: {s_date}, due date: {d_date}")
            return s_date, d_date

        # Pattern 2: Look for "Payment Due Date" separately
        stmt_match = re.search(r'Statement\s+Date\s*\n\s*(\d{2}/\d{2}/\d{4})', text, re.IGNORECASE)
        due_match = re.search(r'Payment\s+Due\s+Date\s*\n\s*(\d{2}/\d{2}/\d{4})', text, re.IGNORECASE)

        s_date = stmt_match.group(1) if stmt_match else "Not Found"
        d_date = due_match.group(1) if due_match else "Not Found"

        if s_date != "Not Found" or d_date != "Not Found":
            print(f"✅ Found statement date: {s_date}, due date: {d_date}")
            return s_date, d_date

        # Pattern 3: Two dates on same line
        match = re.search(r'(\d{2}/\d{2}/\d{4})\s+(\d{2}/\d{2}/\d{4})', text)
        if match:
            s_date, d_date = match.groups()
            print(f"✅ Found dates (pattern 3): statement={s_date}, due={d_date}")
            return s_date, d_date

        print("❌ Dates not found")
        return "Not Found", "Not Found"

    def _extract_idfc_dues(self, text: str) -> tuple:
        print("🔍 Extracting IDFC total and minimum amount due...")

        # Pattern 1: Table format with headers and values on next line
        # "Total Amount Due Minimum Amount Due\nr 29,147.25 r 1,457.36"
        match = re.search(
            r'Total\s+Amount\s+Due\s+Minimum\s+Amount\s+Due\s*\n\s*r?\s*([\d,]+\.?\d*)\s+r?\s*([\d,]+\.?\d*)',
            text, re.IGNORECASE
        )
        if match:
            try:
                total = float(match.group(1).replace(',', ''))
                minimum = float(match.group(2).replace(',', ''))
                print(f"✅ Found total due: {total}, minimum due: {minimum}")
                return total, minimum
            except ValueError:
                pass

        # Pattern 2: Headers and values with currency symbol
        match = re.search(
            r'Total\s+Amount\s+Due.*?Minimum\s+Amount\s+Due.*?\n.*?[r₹]\s*([\d,]+\.?\d*).*?[r₹]\s*([\d,]+\.?\d*)',
            text, re.IGNORECASE | re.DOTALL
        )
        if match:
            try:
                total = float(match.group(1).replace(',', ''))
                minimum = float(match.group(2).replace(',', ''))
                print(f"✅ Found total due (pattern 2): {total}, minimum due: {minimum}")
                return total, minimum
            except ValueError:
                pass

        # Pattern 3: Look for labels individually as fallback
        total = 0.0
        minimum = 0.0

        total_match = re.search(r'Total\s+Amount\s+Due\s*:?\s*[r₹]?\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if total_match:
            try:
                total = float(total_match.group(1).replace(',', ''))
            except ValueError:
                pass

        min_match = re.search(r'Minimum\s+Amount\s+Due\s*:?\s*[r₹]?\s*([\d,]+\.?\d*)', text, re.IGNORECASE)
        if min_match:
            try:
                minimum = float(min_match.group(1).replace(',', ''))
            except ValueError:
                pass

        if total > 0 or minimum > 0:
            print(f"✅ Found dues (fallback): total={total}, minimum={minimum}")
            return total, minimum

        print("❌ Dues not found")
        return 0.0, 0.0

    def _extract_idfc_limits(self, text: str) -> tuple:
        print("🔍 Extracting IDFC credit, available and cash limits...")

        credit = 0.0
        available = 0.0
        cash = 0.0

        # Debug: Print the section we're looking at
        limit_section_match = re.search(r'(Credit\s+Limit.*?Cash\s+Limit.*?r\s*[\d,]+)', text, re.IGNORECASE | re.DOTALL)
        if limit_section_match:
            print(f"📋 DEBUG - Found limits section:\n{limit_section_match.group(0)[:200]}")

        # Pattern 1: Most flexible - find all numbers in the limits section
        limits_block = re.search(
            r'Credit\s+Limit\s+Available\s+Credit\s+Limit.*?Cash\s+Limit',
            text, re.IGNORECASE | re.DOTALL
        )

        if limits_block:
            block_text = limits_block.group(0)
            # Find all amounts (with 'r' prefix and comma formatting)
            amounts = re.findall(r'r\s*([\d,]+(?:\.\d+)?)', block_text, re.IGNORECASE)

            print(f"📊 DEBUG - Found amounts in block: {amounts}")

            if len(amounts) >= 3:
                try:
                    credit = float(amounts[0].replace(',', ''))
                    available = float(amounts[1].replace(',', ''))
                    cash = float(amounts[2].replace(',', ''))
                    print(f"✅ Found limits (pattern 1): Credit={credit}, Available={available}, Cash={cash}")
                    return credit, available, cash
                except (ValueError, IndexError):
                    pass

        # Pattern 2: Try line-by-line approach
        lines = text.split('\n')
        for i, line in enumerate(lines):
            if 'Credit Limit' in line and 'Available Credit Limit' in line and i + 1 < len(lines):
                next_line = lines[i + 1]
                # Extract two numbers from the next line
                nums = re.findall(r'r?\s*([\d,]+(?:\.\d+)?)', next_line)
                if len(nums) >= 2:
                    try:
                        credit = float(nums[0].replace(',', ''))
                        available = float(nums[1].replace(',', ''))
                        print(f"✅ Found credit and available (pattern 2): {credit}, {available}")
                    except ValueError:
                        pass

            if 'Cash Limit' in line and i + 1 < len(lines):
                next_line = lines[i + 1]
                nums = re.findall(r'r?\s*([\d,]+(?:\.\d+)?)', next_line)
                if nums:
                    try:
                        cash = float(nums[0].replace(',', ''))
                        print(f"✅ Found cash (pattern 2): {cash}")
                    except ValueError:
                        pass

        # Pattern 3: Brute force - find ALL numbers near the keywords
        if credit == 0.0:
            credit_section = re.search(
                r'Credit\s+Limit.*?(?:Available|Cash|\n\n)',
                text, re.IGNORECASE | re.DOTALL
            )
            if credit_section:
                nums = re.findall(r'(?:r|₹)?\s*([\d,]+(?:\.\d+)?)', credit_section.group(0))
                for n in nums:
                    try:
                        val = float(n.replace(',', ''))
                        if 10000 <= val <= 10000000:  # Reasonable credit limit range
                            credit = val
                            print(f"✅ Found credit (pattern 3): {credit}")
                            break
                    except ValueError:
                        continue

        if available == 0.0:
            avail_section = re.search(
                r'Available\s+Credit\s+Limit.*?(?:Cash|\n\n)',
                text, re.IGNORECASE | re.DOTALL
            )
            if avail_section:
                nums = re.findall(r'(?:r|₹)?\s*([\d,]+(?:\.\d+)?)', avail_section.group(0))
                for n in nums:
                    try:
                        val = float(n.replace(',', ''))
                        if 0 < val <= 10000000:
                            available = val
                            print(f"✅ Found available (pattern 3): {available}")
                            break
                    except ValueError:
                        continue

        if cash == 0.0:
            cash_section = re.search(
                r'Cash\s+Limit.*?(?:\n\n|STATEMENT)',
                text, re.IGNORECASE | re.DOTALL
            )
            if cash_section:
                nums = re.findall(r'(?:r|₹)?\s*([\d,]+(?:\.\d+)?)', cash_section.group(0))
                for n in nums:
                    try:
                        val = float(n.replace(',', ''))
                        if 1000 <= val <= 1000000:
                            cash = val
                            print(f"✅ Found cash (pattern 3): {cash}")
                            break
                    except ValueError:
                        continue

        print(f"✅ Final limits → Credit: {credit}, Available: {available}, Cash: {cash}")
        return credit, available, cash

    def _extract_idfc_transactions(self, text: str) -> List[Dict]:
        print("🔍 Extracting IDFC transactions...")
        transactions = []

        # Find the transactions section
        tx_section_match = re.search(r'YOUR\s+TRANSACTIONS.*?(?=KEY\s+OFFERS|Page\s+\d+|$)', text, re.IGNORECASE | re.DOTALL)

        if not tx_section_match:
            print("❌ Transactions section not found")
            return transactions

        tx_text = tx_section_match.group(0)

        # Pattern for IDFC transactions (FIXED - handle both formats):
        # 1. DD/MM/YYYY DESCRIPTION AMOUNT
        # 2. DD/MM/YYYY DESCRIPTION AMOUNT CR
        tx_pattern = re.compile(
            r'(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,]+\.?\d*)\s*(CR)?(?:\s*\n|$)',
            re.MULTILINE | re.IGNORECASE
        )

        for match in tx_pattern.finditer(tx_text):
            date = match.group(1)
            desc = match.group(2).strip()
            amt_str = match.group(3)
            is_credit = match.group(4) is not None

            # Skip header lines
            if any(kw in desc.upper() for kw in ['TRANSACTION', 'DATE', 'DETAILS', 'AMOUNT', 'CUSTOMER NAME', 'CARD NUMBER']):
                continue

            try:
                amt = float(amt_str.replace(',', ''))

                # Credits are negative
                if is_credit:
                    amt = -amt

                transactions.append({
                    'date': date,
                    'description': desc,
                    'amount': amt
                })

                cr_marker = " (CR)" if is_credit else ""
                print(f"✅ Found transaction: {date} - {desc[:30]}... - {amt}{cr_marker}")
            except ValueError:
                continue

        # Remove duplicates
        seen = set()
        unique_transactions = []
        for tx in transactions:
            key = (tx['date'], tx['description'][:30], tx['amount'])
            if key not in seen:
                seen.add(key)
                unique_transactions.append(tx)

        print(f"✅ Total IDFC transactions found: {len(unique_transactions)}")
        return unique_transactions

        # ===== INDIAN BANK PARSER (FULL DYNAMIC IMPLEMENTATION) =====
    def _parse_indian_bank(self, text: str, pdf_path: str) -> CreditCardData:
        """Dynamic parser for Indian Bank credit card statements"""
        print("🔧 Using Indian Bank parser (Dynamic)...")

        cardholder_name = self._extract_indian_name(text)
        card_last_4 = self._extract_indian_card_last_4(text)
        statement_date, statement_period, payment_due_date = self._extract_indian_dates(text)
        total_amount_due, minimum_amount_due = self._extract_indian_dues(text)
        credit_limit, available_credit, cash_limit = self._extract_indian_limits(text)
        transactions = self._extract_indian_transactions(text)

        return CreditCardData(
            bank_name='Indian Bank',
            cardholder_name=cardholder_name,
            card_last_4=card_last_4,
            statement_date=statement_date or "Not Found",
            payment_due_date=payment_due_date or "Not Found",
            total_amount_due=total_amount_due,
            minimum_amount_due=minimum_amount_due,
            credit_limit=credit_limit,
            available_credit=available_credit,
            transactions=transactions
        )

    # ---------------- INDIAN BANK EXTRACTORS ----------------

    def _extract_indian_name(self, text: str) -> str:
        print("🔍 Extracting Indian Bank cardholder name...")
        match = re.search(r'Mr\.?\s+([A-Z][A-Za-z\s]+)', text)
        if match:
            name = match.group(1).strip()
            print(f"✅ Found cardholder name: '{name}'")
            return name
        # fallback: first line in uppercase before address
        for line in text.splitlines():
            if line.strip().isupper() and len(line.strip().split()) >= 2:
                print(f"✅ Found name (fallback): '{line.strip()}'")
                return line.strip()
        print("❌ Name not found")
        return "Not Found"

    def _extract_indian_card_last_4(self, text: str) -> str:
        print("🔍 Extracting Indian Bank card last 4 digits...")
        match = re.search(r'(\d{4})\s*\d{2}XX\s*XXXX\s*(\d{4})', text)
        if match:
            last4 = match.group(2)
            print(f"✅ Found card last 4: {last4}")
            return last4
        match = re.search(r'XXXX\s*(\d{4})', text)
        if match:
            last4 = match.group(1)
            print(f"✅ Found card last 4 (pattern 2): {last4}")
            return last4
        print("❌ Card last 4 digits not found")
        return "Not Found"

    def _extract_indian_dates(self, text: str) -> tuple:
        print("🔍 Extracting statement date, period, and payment due date...")
        # Match the line with the date block: "4328 09XX XXXX 7471 20-10-16 21-09-16 - 20-10-16 04-11-16"
        match = re.search(r'(\d{2}-\d{2}-\d{2})\s+(\d{2}-\d{2}-\d{2})\s*-\s*(\d{2}-\d{2}-\d{2})\s+(\d{2}-\d{2}-\d{2})', text)
        if match:
            statement_date = match.group(1)
            statement_period = f"{match.group(2)} - {match.group(3)}"
            due_date = match.group(4)
            print(f"✅ Found dates: statement={statement_date}, period={statement_period}, due={due_date}")
            return statement_date, statement_period, due_date
        print("❌ Dates not found")
        return "Not Found", "Not Found", "Not Found"

    def _extract_indian_dues(self, text: str) -> tuple:
        print("🔍 Extracting Indian Bank total and minimum amount due (Enhanced)...")

        # --- Priority 1: Search for the line near cardholder name (most reliable)
        # Example: "4328 09XX XXXX 7471 2510216 21469.29 1074.00"
        pattern_primary = r'\d{4}\s+\d{2}XX\s+XXXX\s+\d{4}.*?\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})'
        match = re.search(pattern_primary, text)
        if match:
            try:
                total = float(match.group(1).replace(',', ''))
                minimum = float(match.group(2).replace(',', ''))
                print(f"✅ Found dues (primary): Total={total}, Minimum={minimum}")
                return total, minimum
            except ValueError:
                pass

        # --- Priority 2: Look for the last occurrence of two adjacent decimals (fallback)
        # Example: 21469.29 1074.00
        matches = re.findall(r'([\d,]+\.\d{2})\s+([\d,]+\.\d{2})', text)
        if matches:
            # Take the last valid pair (end of statement)
            last_pair = matches[-1]
            try:
                total = float(last_pair[0].replace(',', ''))
                minimum = float(last_pair[1].replace(',', ''))
                print(f"✅ Found dues (fallback last pair): Total={total}, Minimum={minimum}")
                return total, minimum
            except ValueError:
                pass

        print("❌ Dues not found")
        return 0.0, 0.0

    def _extract_indian_limits(self, text: str) -> tuple:
        print("🔍 Extracting Indian Bank credit limits...")
        # Example: "100,000.00 78,530.71 40,000.00 40,000.00"
        match = re.search(r'([\d,]+\.\d{2})\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})', text)
        if match:
            try:
                credit_limit = float(match.group(1).replace(',', ''))
                available_credit = float(match.group(2).replace(',', ''))
                cash_limit = float(match.group(3).replace(',', ''))
                print(f"✅ Found limits: Credit={credit_limit}, Available={available_credit}, Cash={cash_limit}")
                return credit_limit, available_credit, cash_limit
            except ValueError:
                pass
        print("❌ Credit limits not found")
        return 0.0, 0.0, 0.0

    def _extract_indian_transactions(self, text: str) -> List[Dict]:
        print("🔍 Extracting Indian Bank transactions...")
        transactions = []

        # Find transaction block
        match = re.search(r'Txn\.\s*Date\s*Transaction Particulars.*?(?=CONTACT|Mr\.|Page|\Z)', text, re.DOTALL | re.IGNORECASE)
        if not match:
            print("❌ Transactions section not found")
            return transactions

        tx_text = match.group(0)

        tx_pattern = re.compile(
            r'(\d{2}-[A-Z]{3}-\d{2})\s+(.+?)\s+(Cr|Dr)\s+([\d,]+\.\d{2})',
            re.IGNORECASE
        )

        for m in tx_pattern.finditer(tx_text):
            date = m.group(1)
            desc = m.group(2).strip()
            crdr = m.group(3).strip().lower()
            amount = float(m.group(4).replace(',', ''))
            if crdr == 'cr':
                amount = -amount
            transactions.append({
                'date': date,
                'description': desc,
                'amount': amount
            })
            print(f"✅ Found transaction: {date} - {desc[:30]}... - {amount}")

        print(f"✅ Total transactions found: {len(transactions)}")
        return transactions

    def _parse_generic(self, text: str, pdf_path: str) -> CreditCardData:
        """Generic parser for unknown banks"""
        print("🔧 Using generic parser...")
        # Implement generic extraction logic here
        # For now, returning default data
        return CreditCardData(**self._get_default_data('UNKNOWN'))

    def _get_default_data(self, bank_name: str) -> Dict:
        """Get default data structure for banks without specific parsers"""
        return {
            'bank_name': bank_name,
            'cardholder_name': "Not Found",
            'card_last_4': "Not Found",
            'statement_date': "Not Found",
            'payment_due_date': "Not Found",
            'total_amount_due': 0.0,
            'minimum_amount_due': 0.0,
            'credit_limit': 0.0,
            'available_credit': 0.0,
            'transactions': []
        }

class StatementAnalyzer:
    """Analyze and display parsed statement data"""

    @staticmethod
    def display_summary(data: CreditCardData):
        """Display parsed data in a clean format"""
        print("\n" + "="*60)
        print("💳 CREDIT CARD STATEMENT SUMMARY")
        print("="*60)
        print(f"🏦 Bank: {data.bank_name}")
        print(f"👤 Cardholder: {data.cardholder_name}")
        print(f"🔢 Card Number: **** **** **** {data.card_last_4}")
        print(f"📅 Statement Date: {data.statement_date}")
        print(f"⏰ Payment Due Date: {data.payment_due_date}")
        print(f"💰 Total Amount Due: ₹{data.total_amount_due:,.2f}")
        print(f"💸 Minimum Amount Due: ₹{data.minimum_amount_due:,.2f}")
        print(f"🎯 Credit Limit: ₹{data.credit_limit:,.2f}")
        print(f"💳 Available Credit: ₹{data.available_credit:,.2f}")
        print(f"📊 Transactions Count: {len(data.transactions)}")
        print("="*60)

        if data.transactions:
            print("\n📋 TRANSACTIONS:")
            print("-" * 80)
            print(f"{'Date':<12} {'Description':<45} {'Amount':>15}")
            print("-" * 80)
            for tx in data.transactions:
                desc = tx['description'][:42] + "..." if len(tx['description']) > 42 else tx['description']
                amount_prefix = "- ₹" if tx['amount'] < 0 else "₹"
                amount_display = abs(tx['amount'])
                print(f"{tx['date']:<12} {desc:<45} {amount_prefix}{amount_display:>12,.2f}")

            print("-" * 80)
            total_debits = sum(tx['amount'] for tx in data.transactions if tx['amount'] > 0)
            total_credits = sum(abs(tx['amount']) for tx in data.transactions if tx['amount'] < 0)
            print(f"{'Total Debits:':<57} ₹{total_debits:>12,.2f}")
            print(f"{'Total Credits:':<57} - ₹{total_credits:>12,.2f}")

# ===== MAIN EXECUTION =====
def main():
    """Main function to demonstrate the multi-bank parser"""
    parser = CreditCardParser()
    analyzer = StatementAnalyzer()

    # Upload PDF files
    from google.colab import files

    print("🚀 MULTI-BANK CREDIT CARD STATEMENT PARSER (FIXED)")
    print("Supported Banks: HDFC, ICICI, Axis, IDFC First, Indian Bank")
    print("="*60)
    print("📤 Upload your credit card statement PDFs...")

    uploaded = files.upload()

    all_statements = []

    for filename, content in uploaded.items():
        print(f"\n🔍 Processing: {filename}")

        with open(filename, 'wb') as f:
            f.write(content)

        try:
            statement_data = parser.parse_statement(filename)
            all_statements.append(statement_data)
            analyzer.display_summary(statement_data)
            os.remove(filename)

        except Exception as e:
            print(f"❌ Failed to process {filename}: {str(e)}")
            import traceback
            traceback.print_exc()

    print(f"\n🎯 Processing complete! Successfully parsed {len(all_statements)} statement(s).")



In [28]:
main()


🚀 MULTI-BANK CREDIT CARD STATEMENT PARSER (FIXED)
Supported Banks: HDFC, ICICI, Axis, IDFC First, Indian Bank
📤 Upload your credit card statement PDFs...


Saving 434426984-credit-card-oct-16-pdf.pdf to 434426984-credit-card-oct-16-pdf.pdf

🔍 Processing: 434426984-credit-card-oct-16-pdf.pdf
🔍 Identified Bank: Indian Bank
🔧 Using Indian Bank parser (Dynamic)...
🔍 Extracting Indian Bank cardholder name...
✅ Found cardholder name: 'RAMASAMY PRABATH'
🔍 Extracting Indian Bank card last 4 digits...
✅ Found card last 4: 7471
🔍 Extracting statement date, period, and payment due date...
✅ Found dates: statement=20-10-16, period=21-09-16 - 20-10-16, due=04-11-16
🔍 Extracting Indian Bank total and minimum amount due (Enhanced)...
✅ Found dues (primary): Total=21469.29, Minimum=1074.0
🔍 Extracting Indian Bank credit limits...
✅ Found limits: Credit=100000.0, Available=78530.71, Cash=40000.0
🔍 Extracting Indian Bank transactions...
✅ Found transaction: 20-SEP-16 - PREVIOUS STATEMENT OUTSTANDING... - 14760.28
✅ Found transaction: 25-SEP-16 - ROSHAN CHENNAI IN,626913628143... - 5249.0
✅ Found transaction: 03-OCT-16 - PAYMENT RECEIVED-THANK YOU... - -147