In [76]:
# Cell 1: Import libraries for data handling, random data, and PDF creation

import pandas as pd  # For creating and managing data tables
import numpy as np  # For numerical operations

from faker import Faker  # For generating fake names and data
import random
from datetime import datetime, timedelta

import json
import ollama  # For generating transaction descriptions
from pydantic import BaseModel, Field  # For structuring and validating data

from typing import Literal  # For specific category values

from reportlab.lib import colors  # For PDF table styling
from reportlab.lib.pagesizes import letter  # For PDF page size
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch

import os

# Initialize Faker for fake data
fake = Faker()

In [77]:
# Cell 2: Create a folder called to store output files

if not os.path.exists("the_statements"):
    os.makedirs("the_statements")

In [78]:
# Cell 3: Pydantic structured output for a transaction

class Transaction(BaseModel):
    description: str = Field(..., max_length=50, description="Transaction description, max 25 characters")
    category: Literal["Groceries", "Bills", "Deposit"]  # Must be one of these categories
    amount: float  # Transaction amount (positive for deposits, negative for others)

In [83]:
# Cell 4: Generate transaction description 

# Function to create a simple, valid transaction description
def generate_transaction_description(amount, transaction_type):
    # Ensure correct category name (fix any "Bils" to "Bills")
    transaction_type = "Bills" if transaction_type == "Bils" else transaction_type
    
    # Fallback descriptions if Ollama fails
    fallback_descriptions = {
        "Deposit": ["Salary Deposit", "Tax Refund", "Client Payment", "Bank Transfer"],
        "Groceries": ["Grocery Purchase", "Food Shopping", "Supermarket"],
        "Bills": ["Utility Bill", "Rent Payment", "Internet Bill", "Electric Bill"]
    }
    
    # Create a clear, strict prompt for Ollama
    if amount > 0:
        prompt = f"""
        Generate a bank transaction description (max 25 characters) for a deposit.
        Rules:
        - Use title case (e.g., 'Salary Deposit').
        - Keep it 2-3 words, clear, and simple.
        - No numbers, dollar signs, or special characters.
        - No typos (e.g., 'Bils', 'Saisry').
        - Examples: 'Salary Deposit', 'Tax Refund', 'Client Payment'
        """
    else:
        prompt = f"""
        Generate a bank transaction description (max 25 characters) for a {transaction_type} withdrawal.
        Rules:
        - Use title case (e.g., 'Grocery Purchase').
        - Keep it 2-3 words, clear, and simple.
        - No numbers, dollar signs, or special characters.
        - No typos (e.g., 'Bils', 'Saisry').
        - Examples: 'Grocery Purchase', 'Utility Bill', 'Rent Payment'
        """
    
    # Try to get a description from Ollama
    try:
        response = ollama.generate(model="mistral:7b-instruct-v0.3-q4_0", prompt=prompt)
        description = response['response'].strip()
        
        # Check if the description is valid (within 25 characters, no special characters, no typos)
        if len(description) <= 25 and description.isalpha() or " " in description:
            # Ensure title case and no invalid words like 'Bils'
            description = description.title().replace("Bils", "Bills")
        else:
            # Use a random fallback description if invalid
            description = random.choice(fallback_descriptions[transaction_type])
    except:
        # If Ollama fails, use a fallback description
        description = random.choice(fallback_descriptions[transaction_type])
    
    # Return a dictionary with transaction details
    return {
        "description": description,
        "category": transaction_type,
        "amount": amount
    }

In [84]:
# Cell 5: Generate Data

# Function to create a fake bank statement
def generate_bank_statement(num_transactions=10, account_holder="John Doe"):
    # List of possible transaction types
    transaction_types = ["Groceries", "Bills", "Deposit"]
    
    # Create a list of random dates from the last 90 days
    start_date = datetime.now() - timedelta(days=90)
    dates = [start_date + timedelta(days=random.randint(0, 90)) for _ in range(num_transactions)]
    
    # Create a list to store transactions
    transactions = []
    for _ in range(num_transactions):
        # Pick a random transaction type
        transaction_type = random.choice(transaction_types)
        # Set amount: deposits are positive, others are negative
        if transaction_type == "Deposit":
            amount = round(random.uniform(50, 1000), 2)  # $50 to $1000
        else:
            amount = round(random.uniform(-500, -10), 2)  # -$500 to -$10
        # Get transaction description
        transaction = generate_transaction_description(amount, transaction_type)
        transactions.append(transaction)
    
    # Create a table with all transaction data
    data = {
        "Date": [d.strftime("%Y-%m-%d") for d in dates],  # Format dates as YYYY-MM-DD
        "Description": [t["description"] for t in transactions],
        "Category": [t["category"] for t in transactions],
        "Amount": [t["amount"] for t in transactions],
        "Balance": [0] * num_transactions,  # Placeholder for balance
        "Account Holder": [account_holder] * num_transactions,
        "Transaction ID": [fake.bban() for _ in range(num_transactions)]  # Random ID
    }
    df = pd.DataFrame(data)  # Turn data into a table
    
    # Sort transactions by date
    df = df.sort_values("Date")
    
    # Calculate running balance starting from $1000
    initial_balance = 1000
    df["Balance"] = initial_balance + df["Amount"].cumsum()  # Add amounts step-by-step
    
    return df

In [85]:
# Cell 6: Create PDF from the data and format the PDF

# Function to create a PDF from the bank statement with enhanced formatting
def generate_pdf(df, account_holder, output_filename):
    # Set up document with letter size and adjusted margins
    doc = SimpleDocTemplate(
        output_filename,
        pagesize=letter,  # 612x792 points (8.5x11 inches)
        rightMargin=0.5*inch,
        leftMargin=0.5*inch,
        topMargin=0.75*inch,
        bottomMargin=0.5*inch
    )
    elements = []

    # Styles for title and text
    styles = getSampleStyleSheet()
    title = Paragraph(f"Bank Statement for {account_holder}", styles['Title'])
    elements.append(title)
    elements.append(Spacer(1, 0.25*inch))

    # Prepare cell style for wrapping text
    cell_style = styles['Normal']
    cell_style.fontSize = 7  # Smaller font for better fit
    cell_style.leading = 9  # Line spacing
    cell_style.wordWrap = 'CJK'  # Enable text wrapping

    # Format Amount and Balance to 2 decimal places
    df = df.copy()  # Avoid modifying the original DataFrame
    df['Amount'] = df['Amount'].apply(lambda x: f"{x:.2f}")
    df['Balance'] = df['Balance'].apply(lambda x: f"{x:.2f}")

    # Convert DataFrame to table data, wrapping all cells in Paragraphs
    data = [df.columns.tolist()]  # Header row
    for _, row in df.iterrows():
        row_data = [Paragraph(str(cell), cell_style) for cell in row]
        data.append(row_data)

    # Calculate available width (page width minus margins)
    page_width = letter[0] - (doc.leftMargin + doc.rightMargin)  # 612 - (36 + 36) = 540 points
    # Adjusted column widths to prevent cutoff
    col_widths = [
        0.12 * page_width,  # Date (12%)
        0.30 * page_width,  # Description (30%)
        0.12 * page_width,  # Category (12%)
        0.10 * page_width,  # Amount (10%)
        0.10 * page_width,  # Balance (10%)
        0.13 * page_width,  # Account Holder (14%)
        0.13 * page_width   # Transaction Code (12%)
    ]

    # Create table
    table = Table(data, colWidths=col_widths, splitByRow=True)
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),  # Header background
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),  # Header text
        ('ALIGN', (0, 0), (-1, 0), 'CENTER'),  # Center headers
        ('ALIGN', (1, 1), (1, -1), 'LEFT'),  # Left-align Description column
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),  # Bold headers
        ('FONTSIZE', (0, 0), (-1, 0), 8),  # Header font size
        ('BOTTOMPADDING', (0, 0), (-1, 0), 6),  # Header padding
        ('TOPPADDING', (0, 1), (-1, -1), 4),  # Data padding
        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),  # Data background
        ('GRID', (0, 0), (-1, -1), 0.25, colors.black),  # Thinner grid lines
        ('FONTSIZE', (0, 1), (-1, -1), 7),  # Data font size
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),  # Vertical alignment
    ]))

    elements.append(table)

    # Build PDF
    doc.build(elements)
    return output_filename

In [86]:
# Cell 7: Generate and Save Statement

# Set up parameters
num_transactions = 20
account_holder = "John Doe"

# Generate the bank statement
statement = generate_bank_statement(num_transactions, account_holder)

# Show a preview
print("Bank Statement Preview:")
print(statement.head(5))

# Save as CSV
csv_filename = f"the_statements/bank_statement_{account_holder.replace(' ', '_')}.csv"
statement.to_csv(csv_filename, index=False)
print(f"CSV saved as: {csv_filename}")

# Save as PDF
pdf_filename = f"the_statements/bank_statement_{account_holder.replace(' ', '_')}.pdf"
generate_pdf(statement, account_holder, pdf_filename)
print(f"PDF saved as: {pdf_filename}")

Bank Statement Preview:
          Date                          Description   Category  Amount  \
12  2025-03-10                 Groceries Withdrawal  Groceries -322.54   
10  2025-03-15                 Fund Transfer Inflow    Deposit  418.10   
5   2025-03-24                     Bills Withdrawal      Bills  -36.26   
16  2025-03-26                   Grocery Withdrawal  Groceries  -54.10   
0   2025-03-27  Fund Transfer In / Deposit Received    Deposit  497.38   

    Balance Account Holder      Transaction ID  
12   677.46       John Doe  DLBQ31492960724165  
10  1095.56       John Doe  CVZM85982327560238  
5   1059.30       John Doe  PZGW62267657347876  
16  1005.20       John Doe  WDNX29905952620858  
0   1502.58       John Doe  QHKP71939255349920  
CSV saved as: the_statements/bank_statement_John_Doe.csv
PDF saved as: the_statements/bank_statement_John_Doe.pdf
