In [6]:
# Cell 1: Import libraries
import os
import json
import base64
from faker import Faker
from datetime import datetime
import random
import pandas as pd
import ollama
from jinja2 import Environment, FileSystemLoader
import pdfkit
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from lxml import html
import time

fake = Faker()


In [12]:
# Cell 1: Import libraries
import os
import json
import base64
from faker import Faker
from datetime import datetime, timedelta
import random
import pandas as pd
import ollama
from jinja2 import Environment, FileSystemLoader
import pdfkit
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from lxml import html
import time
import glob

fake = Faker()

# Cell 2: Directory setup and bank configuration
INPUT_IMAGES_DIR = "input_images"
TEMPLATES_DIR = "templates"
OUTPUT_DIR = "output_statements"
SAMPLE_LOGOS_DIR = "sample_logos"

for directory in [INPUT_IMAGES_DIR, TEMPLATES_DIR, OUTPUT_DIR, SAMPLE_LOGOS_DIR]:
    os.makedirs(directory, exist_ok=True)

BANK_CONFIG = {
    "chase": {
        "logo": "chase_bank_logo.png",
        "account_types": {
            "personal": "Total Checking",
            "business": "Business Complete Checking"
        }
    },
    "citibank": {
        "logo": "citibank_logo.png",
        "account_types": {
            "personal": "Access Checking",
            "business": "Business Checking"
        }
    },
    "wellsfargo": {
        "logo": "wellsfargo_logo.png",
        "account_types": {
            "personal": "Everyday Checking",
            "business": "Initiate Business Checking"
        }
    },
    "pnc": {
        "logo": "pnc_logo.png",
        "account_types": {
            "personal": "Standard Checking",
            "business": "Business Checking"
        }
    },
    "unknown": {
        "logo": "generic_bank_logo.png",
        "account_types": {
            "personal": "Personal Checking",
            "business": "Business Checking"
        }
    }
}

for bank, config in BANK_CONFIG.items():
    logo_path = os.path.join(SAMPLE_LOGOS_DIR, config["logo"])
    if not os.path.exists(logo_path):
        print(f"Logo file not found for {bank}: {logo_path}, continuing without validation")

# Cell 3: Extract template structure
def extract_template_structure(image_path: str) -> dict:
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")

    prompt = """
    You are an expert in extracting structured data from bank statement images to create reusable HTML templates. Analyze the provided bank statement image and output a JSON object with:
    - "bank_name": The bank name (e.g., PNC, Chase, Citibank, Wells Fargo) identified from text or logo, or "unknown" if unclear.
    - "fields": List of fields with variable names (e.g., {{account_holder}}, {{account_number}}, {{statement_period}}, {{account_type}}, {{account_holder_address}}, {{deposits}}, {{withdrawals}}, {{summary}}, {{important_info}}, {{logo_path}}, {{statement_start}}, {{statement_end}}, {{balance_map}}, {{show_fee_waiver}}) and their descriptions.
    - "layout": Description of the visual structure (e.g., header with bank name/logo and customer service info, account details section, summary table, deposits and withdrawals tables, daily balance table, footnotes).
    Return only valid JSON, no conversational text, wrapped in ```json\n...\n```. For fields, include variable names for a professional HTML template. For deposits and withdrawals, identify table columns (date, description, amount). Ensure compatibility with any bank statement layout.
    """

    try:
        with open(image_path, "rb") as img_file:
            response = ollama.generate(
                model="gemma3:4b",
                prompt=prompt,
                images=[img_file.read()]
            )
        raw_response = response['response'].strip()
        print(f"Raw Gemma 3 response for {image_path}: {raw_response}")
        if raw_response.startswith("```json\n") and raw_response.endswith("\n```"):
            raw_response = raw_response[7:-4].strip()
        data = json.loads(raw_response)
        return data
    except (json.JSONDecodeError, Exception) as e:
        print(f"Error extracting structure from {image_path}: {e}")
        simplified_prompt = """
        Analyze the bank statement image and output a JSON object with:
        - "bank_name": Identify the bank or "unknown".
        - "fields": List key fields with variable names (e.g., {{account_holder}}, {{deposits}}, {{withdrawals}}).
        - "layout": Brief description of the structure.
        Return only valid JSON, no conversational text, wrapped in ```json\n...\n```.
        """
        try:
            with open(image_path, "rb") as img_file:
                response = ollama.generate(
                    model="gemma3:4b",
                    prompt=simplified_prompt,
                    images=[img_file.read()]
                )
            raw_response = response['response'].strip()
            print(f"Retry raw Gemma 3 response for {image_path}: {raw_response}")
            if raw_response.startswith("```json\n") and raw_response.endswith("\n```"):
                raw_response = raw_response[7:-4].strip()
            data = json.loads(raw_response)
            return data
        except (json.JSONDecodeError, Exception) as e:
            print(f"Retry failed for {image_path}: {e}")
            return {
                "bank_name": "unknown",
                "fields": [
                    {"name": "account_holder", "variable": "{{account_holder}}", "description": "Name of the account holder"},
                    {"name": "account_number", "variable": "{{account_number}}", "description": "Account number"},
                    {"name": "statement_period", "variable": "{{statement_period}}", "description": "Statement date range"},
                    {"name": "account_type", "variable": "{{account_type}}", "description": "Type of account"},
                    {"name": "account_holder_address", "variable": "{{account_holder_address}}", "description": "Account holder's address"},
                    {"name": "deposits", "variable": "{{deposits}}", "description": "List of deposit transactions"},
                    {"name": "withdrawals", "variable": "{{withdrawals}}", "description": "List of withdrawal transactions"},
                    {"name": "summary", "variable": "{{summary}}", "description": "Summary of balances and transaction counts"},
                    {"name": "important_info", "variable": "{{important_info}}", "description": "Notices and account information"},
                    {"name": "logo_path", "variable": "{{logo_path}}", "description": "Bank logo as base64 data URL"},
                    {"name": "statement_start", "variable": "{{statement_start}}", "description": "Start date of statement period"},
                    {"name": "statement_end", "variable": "{{statement_end}}", "description": "End date of statement period"},
                    {"name": "balance_map", "variable": "{{balance_map}}", "description": "Daily ending balances"},
                    {"name": "show_fee_waiver", "variable": "{{show_fee_waiver}}", "description": "Flag for fee waiver notice"}
                ],
                "layout": "Header with bank logo and customer service info, account details section, summary table, deposits and withdrawals tables, daily balance table, footnotes with disclosures."
            }

# Cell 4: Generate HTML template
def generate_html_template(template_structure: dict, bank: str) -> str:
    prompt = f"""
    You are an expert in HTML/CSS design for bank statements. Using the provided template structure, generate a complete HTML template with placeholders for variables and professional CSS styling to match the described layout. The template should be detailed and professional, suitable for any bank statement, with:
    - Header with {{bank_name}}, {{logo_path}}, and customer service info (e.g., website, phone numbers).
    - Account details for {{account_holder}}, {{account_holder_address}}, {{account_number}}, {{statement_period}}.
    - Important account information section with {{important_info}}.
    - Checking summary table with {{summary}} (fields: beginning_balance, deposits_count, deposits_total, withdrawals_count, withdrawals_total, transactions_count, ending_balance).
    - Deposits table with {{deposits}} (columns: date, description, amount).
    - Withdrawals table with {{withdrawals}} (columns: date, description, amount).
    - Daily balance table with {{statement_start}}, {{statement_end}}, {{balance_map}}.
    - Footnotes section with disclosures.
    - Use professional CSS (Arial font, 12px text, 2px borders, clear section dividers, table layouts).
    Output only the HTML code, no explanations.
    Structure:
    {json.dumps(template_structure, indent=2)}
    """

    try:
        response = ollama.generate(model="gemma3:4b", prompt=prompt)
        html_content = response['response'].strip()
        try:
            html.fromstring(html_content)
            return html_content
        except:
            print(f"Invalid HTML generated, using fallback template")
    except Exception as e:
        print(f"Error generating HTML: {e}")

    return """
    <!DOCTYPE html>
    <html lang="en">
    <head>
      <meta charset="UTF-8">
      <title>{{ account_type }} Statement</title>
      <style>
        body { font-family: Arial, sans-serif; margin: 40px; font-size: 12px; }
        .header-table { width: 100%; border-collapse: collapse; margin-bottom: 40px; }
        .header-table td { vertical-align: top; padding: 10px; }
        .customer-service { text-align: right; margin-top: 15px; }
        .cs-box { display: inline-block; text-align: left; padding: 10px; }
        .cs-header { text-transform: uppercase; font-weight: bold; font-size: 12px; border-top: 3px solid #000; border-bottom: 3px solid #000; padding: 5px 10px; margin: 0; width: 100%; box-sizing: border-box; }
        .cs-content { margin-top: 10px; line-height: 1.5; padding-left: 10px; text-align: left; }
        .date-range { font-weight: bold; margin-bottom: 10px; }
        .account-number { margin-bottom: 15px; }
        .section-divider { position: relative; margin: 40px 0 0; }
        .section-header { display: inline-block; border: 2px solid #000; padding: 6px 12px; margin-top: -2px; background: #fff; position: relative; z-index: 1; box-sizing: border-box; min-width: 150px; }
        .section-header h2 { margin: 0; font-size: 14px; font-weight: bold; text-transform: uppercase; white-space: nowrap; }
        .section-divider::after { content: ""; position: absolute; top: 50%; left: 0; right: 0; border-top: 2px solid #000; transform: translateY(-50%); z-index: 0; }
        .summary-table { width: 60%; border-collapse: collapse; margin-left: 0; margin-bottom: 40px; }
        .summary-table th, .summary-table td { border: none; padding: 6px; text-align: left; }
        .summary-table th:nth-child(1), .summary-table td:nth-child(1) { width: 40%; }
        .summary-table th:nth-child(2), .summary-table td:nth-child(2) { width: 30%; padding-left: 30px; }
        .summary-table th:nth-child(3), .summary-table td:nth-child(3) { width: 30%; padding-left: 30px; }
        .summary-table th { font-weight: bold; font-size: 12px; }
        .data-table { width: 100%; border-collapse: collapse; margin-bottom: 40px; table-layout: fixed; }
        .data-table th, .data-table td { border: none; padding: 6px; }
        .data-table tr.date-row td { border-bottom: 2px solid #000; }
        .data-table th:nth-child(1), .data-table td:nth-child(1) { width: 15%; text-align: left; }
        .data-table th:nth-child(2), .data-table td:nth-child(2) { width: 70%; text-align: left; }
        .data-table th:nth-child(3), .data-table td:nth-child(3) { width: 15%; text-align: right; }
        .balance-table { width: 100%; border-collapse: collapse; margin-bottom: 40px; table-layout: fixed; }
        .balance-table th, .balance-table td { border: none; padding: 6px; }
        .balance-table th:nth-child(1), .balance-table td:nth-child(1) { width: 50%; text-align: left; }
        .balance-table th:nth-child(2), .balance-table td:nth-child(2) { width: 50%; text-align: left; }
        .footnotes { margin-top: 40px; font-size: 10px; line-height: 1.5; }
        .important-info p { font-size: 12px; line-height: 1.5; margin: 10px 0; }
        hr.section-rule { border: 0; height: 2px; background: #000; margin: 15px 0; }
      </style>
    </head>
    <body>
      <table class="header-table">
        <tr>
          <td>
            {% if logo_path %}
            <img src="{{logo_path}}" alt="{{bank_name}} Logo" width="120"><br>
            {% endif %}
            {{bank_name}} Bank<br>
            PO Box 123456<br>
            City, State 12345
          </td>
          <td class="customer-service">
            <div class="date-range">{{statement_period}}</div>
            <div class="account-number">Account Number: {{account_number}}</div>
            <div class="cs-box">
              <div class="cs-header">Customer Service Information</div>
              <div class="cs-content">
                Web site: <span style="margin-left: 120px;">{{bank_name.lower()}}.com</span><br>
                Service Center: <span style="margin-left: 70px;">1-800-123-4567</span><br>
                Hearing Impaired: <span style="margin-left: 60px;">1-800-123-4568</span><br>
                Para Espanol: <span style="margin-left: 80px;">1-888-123-4567</span><br>
                International Calls: <span style="margin-left: 60px;">1-555-123-4567</span>
              </div>
            </div>
          </td>
        </tr>
      </table>
      <div>
        <strong>
          {{account_holder}}<br>
          {{account_holder_address}}
        </strong>
      </div>
      <div class="section-divider">
        <div class="section-header"><h2>Important Account Information</h2></div>
      </div>
      <div class="important-info">
        {{important_info}}
      </div>
      <div class="section-divider">
        <div style="text-align: center; margin: 0 auto; margin-bottom: -15px; max-width: 100%;">
          {{account_type}}
        </div>
        <div class="section-header"><h2>Account Summary</h2></div>
      </div>
      <table class="summary-table">
        <tr>
          <th></th>
          <th>Instances</th>
          <th>Amount</th>
        </tr>
        <tr>
          <td>Beginning Balance</td>
          <td>–</td>
          <td>{{summary.beginning_balance}}</td>
        </tr>
        <tr>
          <td>Deposits and Additions</td>
          <td>{{summary.deposits_count}}</td>
          <td>{{summary.deposits_total}}</td>
        </tr>
        <tr>
          <td>Withdrawals</td>
          <td>{{summary.withdrawals_count}}</td>
          <td>{{summary.withdrawals_total}}</td>
        </tr>
        <tr>
          <td>Ending Balance</td>
          <td>{{summary.transactions_count}}</td>
          <td>{{summary.ending_balance}}</td>
        </tr>
      </table>
      <p>
        {% if show_fee_waiver %}
        Your monthly service fee was waived due to meeting balance or deposit requirements.
        {% endif %}
      </p>
      <div class="section-divider">
        <div class="section-header"><h2>Deposits and Additions</h2></div>
      </div>
      <table class="data-table">
        <tr>
          <th>Date</th>
          <th>Description</th>
          <th>Amount</th>
        </tr>
        {% for deposit in deposits %}
        <tr class="date-row">
          <td>{{deposit.date}}</td>
          <td>{{deposit.description}}</td>
          <td>{{deposit.amount}}</td>
        </tr>
        {% endfor %}
        {% if not deposits %}
        <tr>
          <td colspan="3">No deposits for this period.</td>
        </tr>
        {% endif %}
        <tr>
          <td colspan="2"><strong>Total Deposits and Additions</strong></td>
          <td style="text-align: right;">{{summary.deposits_total}}</td>
        </tr>
      </table>
      <div class="section-divider">
        <div class="section-header"><h2>Withdrawals</h2></div>
      </div>
      <table class="data-table">
        <tr>
          <th>Date</th>
          <th>Description</th>
          <th>Amount</th>
        </tr>
        {% for withdrawal in withdrawals %}
        <tr class="date-row">
          <td>{{withdrawal.date}}</td>
          <td>{{withdrawal.description}}</td>
          <td>{{withdrawal.amount}}</td>
        </tr>
        {% endfor %}
        {% if not withdrawals %}
        <tr>
          <td colspan="3">No withdrawals for this period.</td>
        </tr>
        {% endif %}
        <tr>
          <td colspan="2"><strong>Total Withdrawals</strong></td>
          <td style="text-align: right;">{{summary.withdrawals_total}}</td>
        </tr>
      </table>
      <div class="section-divider">
        <div class="section-header"><h2>Daily Ending Balance</h2></div>
      </div>
      <table class="balance-table">
        <tr><th>Date</th><th class="num">Amount</th></tr>
        {% set bal = summary.beginning_balance %}
        {% for n in range((statement_end - statement_start).days + 1) %}
        {% set this_day = (statement_start + n*day_delta).strftime("%m/%d") %}
        {% if (statement_start + n*day_delta).isoformat() in balance_map %}
        {% set bal = balance_map[(statement_start + n*day_delta).isoformat()] %}
        {% endif %}
        <tr>
          <td>{{this_day}}</td>
          <td class="num">{{bal}}</td>
        </tr>
        {% endfor %}
      </table>
      <div class="footnotes">
        <p style="font-size: 10px; font-weight: normal; line-height: 1.5; margin-bottom: 10px;">Disclosures</p>
        <p>All account transactions are subject to the {{bank_name}} Deposit Account Agreement, available at {{bank_name.lower()}}.com. For details on overdraft policies and fees, visit {{bank_name.lower()}}.com/overdraft or call 1-800-123-4567.</p>
        <p>{{bank_name}} Bank is a Member FDIC.</p>
      </div>
    </body>
    </html>
    """

# Cell 5: Save HTML template
def save_template(html_content: str, bank: str, image_name: str) -> str:
    template_filename = os.path.join(TEMPLATES_DIR, f"{bank}_template_{image_name}.html")
    with open(template_filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return template_filename

# Cell 6: Generate synthetic data
def generate_synthetic_data(bank: str, account_type: str = "personal") -> dict:
    num_transactions = random.randint(3, 12)
    deposits = []
    withdrawals = []
    total_deposits = 0.0
    total_withdrawals = 0.0
    beginning_balance = round(random.uniform(500, 5000), 2)
    current_balance = beginning_balance
    start_date = datetime.now() - timedelta(days=30)
    end_date = start_date + timedelta(days=30)
    balance_map = {}
    transactions = []

    for _ in range(num_transactions):
        is_deposit = random.choice([True, False])
        amount = round(random.uniform(10, 1000 if is_deposit else 500), 2)
        date = start_date + timedelta(days=random.randint(1, 30))
        description_prompt = f"Generate a realistic transaction description for a {account_type} bank account at a bank (e.g., 'Payroll Deposit', 'Grocery Purchase', 'ATM Withdrawal'). Return only the description, no extra text."
        try:
            description_response = ollama.generate(model="mistral", prompt=description_prompt)
            description = description_response['response'].strip()
        except:
            description = "Payroll Deposit" if is_deposit else "Purchase"
        
        transaction = {
            "date": date.strftime("%m/%d"),
            "description": description,
            "amount": f"${amount:,.2f}" if is_deposit else f"-${amount:,.2f}"
        }
        if is_deposit:
            deposits.append(transaction)
            total_deposits += amount
            current_balance += amount
        else:
            withdrawals.append(transaction)
            total_withdrawals += amount
            current_balance -= amount
        transactions.append((date, amount if is_deposit else -amount))
    
    balance = beginning_balance
    for n in range((end_date - start_date).days + 1):
        day = start_date + timedelta(days=n)
        day_str = day.isoformat()
        for trans_date, amount in transactions:
            if trans_date.date() == day.date():
                balance += amount
        balance_map[day_str] = f"${balance:,.2f}"

    important_info = f"""
    <p>Effective July 1, 2025, the monthly service fee for {BANK_CONFIG[bank]['account_types'][account_type]} accounts is $15 unless minimum balance or deposit requirements are met.</p>
    <p>Visit {bank.lower()}.com for account details or contact Customer Service at 1-800-123-4567.</p>
    """

    return {
        "account_holder": fake.name().upper(),
        "account_number": fake.bban()[:15],
        "statement_period": f"{start_date.strftime('%B %d')} - {end_date.strftime('%B %d, %Y')}",
        "account_type": BANK_CONFIG[bank]["account_types"][account_type],
        "account_holder_address": fake.address().replace('\n', '<br>'),
        "deposits": deposits,
        "withdrawals": withdrawals,
        "summary": {
            "beginning_balance": f"${beginning_balance:,.2f}",
            "deposits_count": len(deposits),
            "deposits_total": f"${total_deposits:,.2f}",
            "withdrawals_count": len(withdrawals),
            "withdrawals_total": f"${total_withdrawals:,.2f}",
            "transactions_count": len(deposits) + len(withdrawals),
            "ending_balance": f"${current_balance:,.2f}"
        },
        "important_info": important_info,
        "logo_path": f"data:image/png;base64,{base64.b64encode(open(os.path.join(SAMPLE_LOGOS_DIR, f'{bank}_logo.png'), 'rb').read()).decode('utf-8')}" if os.path.exists(os.path.join(SAMPLE_LOGOS_DIR, f'{bank}_logo.png')) else "",
        "statement_start": start_date,
        "statement_end": end_date,
        "balance_map": balance_map,
        "show_fee_waiver": current_balance >= 1500
    }

# Cell 7: Populate template with synthetic data
def populate_template(template_filename: str, custom_data: dict, output_dir: str, bank: str, image_name: str) -> tuple:
    html_dir = os.path.join(output_dir, "HTML")
    pdf_dir = os.path.join(output_dir, "PDF")
    os.makedirs(html_dir, exist_ok=True)
    os.makedirs(pdf_dir, exist_ok=True)

    env = Environment(loader=FileSystemLoader(TEMPLATES_DIR))
    env.globals.update(day_delta=timedelta(days=1))
    template = env.get_template(os.path.basename(template_filename))

    custom_data = custom_data or generate_synthetic_data(bank)
    safe_account_holder = ''.join(c for c in custom_data["account_holder"] if c.isalnum() or c in (' ', '_')).replace(' ', '_')
    html_filename = os.path.join(html_dir, f"statement_{bank}_{safe_account_holder}_{image_name}.html")
    pdf_filename = os.path.join(pdf_dir, f"statement_{bank}_{safe_account_holder}_{image_name}.pdf")

    rendered_html = template.render(**custom_data)
    with open(html_filename, 'w', encoding='utf-8') as f:
        f.write(rendered_html)

    wkhtmltopdf_path = "C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe"  # Adjust for your OS
    config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)
    try:
        pdfkit.from_string(rendered_html, pdf_filename, configuration=config, options={
            "enable-local-file-access": "",
            "page-size": "Letter",
            "margin-top": "0.8in",
            "margin-right": "0.9in",
            "margin-bottom": "0.8in",
            "margin-left": "0.9in",
            "encoding": "UTF-8"
        })
    except OSError as e:
        print(f"PDF generation failed for {bank}: {e}")

    return html_filename, pdf_filename

# Cell 8: Monitor input_images directory and process existing files
def process_image(image_path: str, bank: str, custom_data: dict = None):
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    print(f"Processing image: {image_path}")
    try:
        template_structure = extract_template_structure(image_path)
        detected_bank = template_structure.get("bank_name", bank).lower()
        if detected_bank not in BANK_CONFIG:
            print(f"Detected bank {detected_bank} not in BANK_CONFIG, using fallback: unknown")
            detected_bank = "unknown"
        html_content = generate_html_template(template_structure, detected_bank)
        template_filename = save_template(html_content, detected_bank, image_name)
        html_file, pdf_file = populate_template(template_filename, custom_data, OUTPUT_DIR, detected_bank, image_name)
        print(f"Generated HTML: {html_file}")
        print(f"Generated PDF: {pdf_file}")
    except Exception as e:
        print(f"Error processing {image_path}: {e}")

class ImageHandler(FileSystemEventHandler):
    def __init__(self, bank: str, custom_data: dict = None):
        self.bank = bank
        self.custom_data = custom_data

    def on_created(self, event):
        if event.is_directory:
            return
        if event.src_path.endswith(('.png', '.jpg', '.jpeg')):
            time.sleep(1)
            process_image(event.src_path, self.bank, self.custom_data)

def monitor_directory(bank: str, custom_data: dict = None):
    image_files = glob.glob(os.path.join(INPUT_IMAGES_DIR, "*.png")) + \
                  glob.glob(os.path.join(INPUT_IMAGES_DIR, "*.jpg")) + \
                  glob.glob(os.path.join(INPUT_IMAGES_DIR, "*.jpeg"))
    if not image_files:
        print(f"No images found in {INPUT_IMAGES_DIR}")
    for image_path in image_files:
        process_image(image_path, bank, custom_data)

    event_handler = ImageHandler(bank, custom_data)
    observer = Observer()
    observer.schedule(event_handler, INPUT_IMAGES_DIR, recursive=False)
    observer.start()
    print(f"Monitoring {INPUT_IMAGES_DIR} for new images...")
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        observer.stop()
    observer.join()


Logo file not found for unknown: sample_logos\generic_bank_logo.png, continuing without validation
Processing image: input_images\Capture.PNG
Raw Gemma 3 response for input_images\Capture.PNG: ```json
{
  "bank_name": "PNC",
  "fields": [
    {
      "variable_name": "account_holder",
      "description": "Name of the account holder"
    },
    {
      "variable_name": "account_number",
      "description": "Account number"
    },
    {
      "variable_name": "statement_period",
      "description": "Statement period"
    },
    {
      "variable_name": "account_type",
      "description": "Type of account"
    },
    {
      "variable_name": "account_holder_address",
      "description": "Account holder's address"
    },
    {
      "variable_name": "deposits",
      "description": "List of deposits"
    },
    {
      "variable_name": "withdrawals",
      "description": "List of withdrawals"
    },
    {
      "variable_name": "summary",
      "description": "Summary of account activ

In [None]:

# Cell 9: Main
if __name__ == "__main__":
    bank = "unknown"  # Default for testing
    custom_data = {
        "account_holder": "John Doe",
        "account_number": "1234567890",
        "statement_period": "July 01 - July 31, 2025",
        "account_type": "Personal Checking",
        "account_holder_address": "123 Main St<br>Anytown, USA 12345",
        "deposits": [
            {"date": "07/05", "description": "Payroll Deposit", "amount": "$3000.00"},
            {"date": "07/15", "description": "Refund", "amount": "$150.00"}
        ],
        "withdrawals": [
            {"date": "07/10", "description": "Online Purchase", "amount": "-$75.00"},
            {"date": "07/20", "description": "ATM Withdrawal", "amount": "-$200.00"}
        ],
        "summary": {
            "beginning_balance": "$1500.00",
            "deposits_count": 2,
            "deposits_total": "$3150.00",
            "withdrawals_count": 2,
            "withdrawals_total": "$275.00",
            "transactions_count": 4,
            "ending_balance": "$4375.00"
        },
        "important_info": """
        <p>Effective July 1, 2025, the monthly service fee for Personal Checking accounts is $15 unless minimum balance or deposit requirements are met.</p>
        <p>Visit bank.com for account details or contact Customer Service at <b>1-800-123-4567</b>.</p>
        """,
        "logo_path": "",
        "statement_start": datetime(2025, 7, 1),
        "statement_end": datetime(2025, 7, 31),
        "balance_map": {
            "2025-07-01": "$1500.00",
            "2025-07-05": "$4500.00",
            "2025-07-10": "$4425.00",
            "2025-07-15": "$4575.00",
            "2025-07-20": "$4375.00",
            "2025-07-31": "$4375.00"
        },
        "show_fee_waiver": True
    }
    monitor_directory(bank, custom_data)