In [1]:
# Cell 1: Import libraries for data handling, random data, and PDF creation
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta
import json
import ollama
from pydantic import BaseModel, Field
from typing import List
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch
import os

# Initialize Faker for fake data
fake = Faker()


In [2]:
# Cell 2: Create directory


In [3]:

# Cell 3: Pydantic structured output for a transaction
class Transaction(BaseModel):
    description: str = Field(..., max_length=35, description="Transaction description, max 35 characters")
    category: str
    amount: float


In [4]:
# Cell 4: Generate category lists using LLM
def generate_category_lists() -> tuple[List[str], List[str]]:
    prompt = """
    Generate two lists of bank transaction categories in JSON format. One list for reasons someone loses money (e.g., utilities, subscriptions) and one for reasons someone gains money (e.g., gifts, deposits). Each list should have 5-7 unique categories, each 1-2 words, title case, no punctuation. Return as:
    {
      "loss_categories": ["Category One", "Category Two", ...],
      "gain_categories": ["Category One", "Category Two", ...]
    }
    """
    try:
        response = ollama.generate(model="mistral:7b-instruct-v0.3-q4_0", prompt=prompt)
        category_data = json.loads(response['response'].strip())
        loss_categories = category_data.get("loss_categories", [])
        gain_categories = category_data.get("gain_categories", [])
        # Validate categories
        loss_categories = [cat for cat in loss_categories if isinstance(cat, str) and 1 <= len(cat.split()) <= 2]
        gain_categories = [cat for cat in gain_categories if isinstance(cat, str) and 1 <= len(cat.split()) <= 2]
        if len(loss_categories) < 5 or len(gain_categories) < 5:
            raise ValueError("Insufficient valid categories")
    except (json.JSONDecodeError, ValueError):
        # Fallback categories if LLM fails or returns invalid data
        loss_categories = ["Utility Payment", "Subscription Fee", "Online Purchase", "Rent Payment", "Grocery Shopping", "Insurance Bill"]
        gain_categories = ["Salary Deposit", "Tax Refund", "Gift Received", "Client Payment", "Investment Return", "Cash Deposit"]
    return loss_categories, gain_categories

# Cell 5: Generate transaction description using LLM
def generate_transaction_description(amount: float, category: str) -> dict:
    prompt = f"""
    Generate a bank transaction description (3-5 words, max 25 characters) for a transaction in the '{category}' category.
    Rules:
    - Use title case (e.g., 'Grocery Store Purchase').
    - No punctuation (commas, periods, etc.).
    - No parentheses, dashes, or dollar signs.
    - No amounts or numbers as words.
    - No typos.
    - Use simple, clear phrases.
    - Examples: 'Grocery Store Purchase', 'Utility Bill Payment', 'Salary Direct Deposit'
    """
    try:
        response = ollama.generate(model="mistral:7b-instruct-v0.3-q4_0", prompt=prompt)
        description = response['response'].strip()[:25]
    except:
        description = f"{category} Transaction"  # Fallback if LLM fails

    # Clean description
    description = description.replace("(", "").replace(")", "").replace(",", "").replace(":", "").replace("-", "").replace("$", "").replace(".", "")
    description = ' '.join(word.capitalize() for word in description.split())[:25]

    # Validate word count
    words = description.split()
    if len(words) < 3 or len(words) > 5:
        description = f"{category} Transaction"[:25]  # Fallback to generic description

    transaction = Transaction(
        description=description,
        category=category,
        amount=amount
    )
    return transaction.dict()

In [5]:

# Cell 6: Generate the bank statement
def generate_bank_statement(num_transactions: int = 10, account_holder: str = "John Doe") -> pd.DataFrame:
    # Get category lists
    loss_categories, gain_categories = generate_category_lists()

    # Generate random dates within the last 90 days
    start_date = datetime.now() - timedelta(days=90)
    dates = [start_date + timedelta(days=random.randint(0, 90)) for _ in range(num_transactions)]

    # Create transactions
    transactions = []
    for _ in range(num_transactions):
        is_gain = random.choice([True, False])
        category = random.choice(gain_categories if is_gain else loss_categories)
        amount = round(random.uniform(50, 1000), 2) if is_gain else round(random.uniform(-500, -10), 2)
        transaction = generate_transaction_description(amount, category)
        transactions.append(transaction)

    # Create DataFrame
    data = {
        "Date": [d.strftime("%Y-%m-%d") for d in dates],
        "Description": [t["description"] for t in transactions],
        "Category": [t["category"] for t in transactions],
        "Amount": [t["amount"] for t in transactions],
        "Balance": [0.0] * num_transactions,
        "Account Holder": [account_holder] * num_transactions,
        "Transaction ID": [fake.bban() for _ in range(num_transactions)]
    }
    df = pd.DataFrame(data)

    # Sort by date and calculate running balance
    df = df.sort_values("Date")
    initial_balance = 1000.0
    df["Balance"] = initial_balance + df["Amount"].cumsum()

    return df


In [6]:
# Cell 7: Create PDF from the data (Updated to match the example bank statement)
def generate_pdf(df: pd.DataFrame, account_holder: str, output_filename: str) -> str:
    doc = SimpleDocTemplate(
        output_filename,
        pagesize=letter,
        rightMargin=0.5*inch,
        leftMargin=0.5*inch,
        topMargin=0.75*inch,
        bottomMargin=0.5*inch
    )
    elements = []

    styles = getSampleStyleSheet()
    
    # Custom styles for different sections
    bank_style = styles['Normal']
    bank_style.fontSize = 12
    bank_style.fontName = 'Helvetica-Bold'
    
    address_style = styles['Normal']
    address_style.fontSize = 10
    
    title_style = styles['Title']
    title_style.alignment = 1  # Center alignment
    
    detail_style = styles['Normal']
    detail_style.fontSize = 10
    detail_style.leading = 12
    
    # Header Section: Bank Name and Address
    bank_name = Paragraph("FIRST CITIZENS BANK", bank_style)
    bank_address = Paragraph("231 Valley Farms Street<br/>Santa Monica, CA 90403<br/>firstcitizensbank@domain.com", address_style)
    statement_title = Paragraph("STATEMENT OF ACCOUNT", title_style)
    
    # Account Details and Summary (positioned on the right)
    account_number = Paragraph(f"Account Number: 111-234-567-890", detail_style)
    statement_date = Paragraph(f"Statement Date: {datetime.now().strftime('%m/%d/%Y')}", detail_style)
    period_covered = Paragraph(f"Period Covered: {min(df['Date']).replace('-', '/')} to {max(df['Date']).replace('-', '/')} ", detail_style)
    page_number = Paragraph("Page 1 of 1", detail_style)
    
    # Customer Information (positioned on the left)
    customer_info = Paragraph(f"{account_holder}<br/>2450 Courage St, STE 108<br/>Brownsville, TX 78521", detail_style)
    
    # Summary Section
    initial_balance = 1000.0  # From the generate_bank_statement function
    opening_balance = initial_balance
    total_credits = df[df['Amount'] > 0]['Amount'].sum()
    total_debits = abs(df[df['Amount'] < 0]['Amount'].sum())
    closing_balance = df['Balance'].iloc[-1]
    num_transactions = len(df)
    
    summary = [
        Paragraph(f"Opening Balance: {opening_balance:,.2f}", detail_style),
        Paragraph(f"Total Credit Amount: {total_credits:,.2f}", detail_style),
        Paragraph(f"Total Debit Amount: {total_debits:,.2f}", detail_style),
        Paragraph(f"Closing Balance: {closing_balance:,.2f}", detail_style),
        Paragraph("Account Type: Current Account", detail_style),
        Paragraph(f"Number of Transactions: {num_transactions}", detail_style),
    ]
    
    # Transactions Table Preparation
    # Convert Amount into Credit and Debit columns
    df = df.copy()
    df['Credit'] = df['Amount'].apply(lambda x: f"{x:,.2f}" if x > 0 else "")
    df['Debit'] = df['Amount'].apply(lambda x: f"{abs(x):,.2f}" if x < 0 else "")
    df['Balance'] = df['Balance'].apply(lambda x: f"{x:,.2f}")
    df['Date'] = df['Date'].apply(lambda x: x.replace('-', '/'))
    
    # Select columns for the table
    table_data = [["Date", "Description", "Credit", "Debit", "Balance"]]
    for _, row in df.iterrows():
        table_data.append([row['Date'], row['Description'], row['Credit'], row['Debit'], row['Balance']])
    
    # Table styling
    page_width = letter[0] - (doc.leftMargin + doc.rightMargin)
    col_widths = [
        0.15 * page_width,  # Date
        0.35 * page_width,  # Description
        0.15 * page_width,  # Credit
        0.15 * page_width,  # Debit
        0.20 * page_width   # Balance
    ]
    
    table = Table(table_data, colWidths=col_widths, splitByRow=True)
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('ALIGN', (0, 0), (-1, 0), 'CENTER'),
        ('ALIGN', (1, 1), (1, -1), 'LEFT'),
        ('ALIGN', (2, 1), (-1, -1), 'RIGHT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 10),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 6),
        ('TOPPADDING', (0, 1), (-1, -1), 4),
        ('FONTSIZE', (0, 1), (-1, -1), 9),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('GRID', (0, 0), (-1, -1), 0.25, colors.black),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey]),
    ]))
    
    # Assemble the layout
    elements.append(bank_name)
    elements.append(bank_address)
    elements.append(Spacer(1, 0.1*inch))
    
    # Use a table to position customer info and summary side by side
    info_table_data = [
        [customer_info, statement_title],
        ["", account_number],
        ["", statement_date],
        ["", period_covered],
        ["", page_number]
    ]
    info_table = Table(info_table_data, colWidths=[2.5*inch, 4.5*inch])
    info_table.setStyle(TableStyle([
        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
    ]))
    elements.append(info_table)
    elements.append(Spacer(1, 0.25*inch))
    
    # Summary Section
    for line in summary:
        elements.append(line)
    elements.append(Spacer(1, 0.25*inch))
    
    # Transactions Section
    elements.append(Paragraph("Transactions", styles['Heading2']))
    elements.append(Spacer(1, 0.1*inch))
    elements.append(table)
    
    # Build the document
    doc.build(elements)
    return output_filename

In [8]:

# Cell 8: Generate and Save Statement
num_transactions = 12
account_holder = "John Doe"

statement = generate_bank_statement(num_transactions, account_holder)
print("Bank Statement Preview:")
print(statement.head(5))

csv_filename = f"testcopyfolder/bank_statement_{account_holder.replace(' ', '_')}.csv"
statement.to_csv(csv_filename, index=False)
print(f"CSV saved as: {csv_filename}")

pdf_filename = f"testcopyfolder/bank_statement_{account_holder.replace(' ', '_')}.pdf"
generate_pdf(statement, account_holder, pdf_filename)
print(f"PDF saved as: {pdf_filename}")

C:\Users\cha\AppData\Local\Temp\ipykernel_14888\916404054.py:59: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  return transaction.dict()


Bank Statement Preview:
         Date               Description           Category  Amount  Balance  \
6  2025-03-10   Groceries Market Purcha          GROCERIES -226.66   773.34   
8  2025-03-31    Birthday Gift Purchase              GIFTS  333.74  1107.08   
4  2025-04-13        Water Bill Payment          UTILITIES -237.73   869.35   
3  2025-04-16    Birthday Gift Purchase              GIFTS  983.54  1852.89   
7  2025-04-16  Investment Income Earned  INVESTMENT_INCOME  557.52  2410.41   

  Account Holder      Transaction ID  
6       John Doe  FPBC54254224852031  
8       John Doe  EOYW81244235413543  
4       John Doe  KCSC76413457967967  
3       John Doe  NDVQ08514645503271  
7       John Doe  ZAEA12057113075624  
CSV saved as: testcopyfolder/bank_statement_John_Doe.csv
PDF saved as: testcopyfolder/bank_statement_John_Doe.pdf
