- reportlab: For PDF generation
- faker: For generating realistic fake customer data
- pandas: For handling data structures and tables

In [None]:
import pandas as pd
from faker import Faker
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.units import inch
import random
from datetime import datetime, timedelta
from azure.identity.aio import DefaultAzureCredential
from azure.storage.blob.aio import BlobServiceClient
import os

In [3]:
fake = Faker()

# Create vintage Magic: The Gathering cards database
vintage_mtg_cards = [
    {"name": "Black Lotus", "set": "Alpha", "price": 45000.00, "condition": "Near Mint"},
    {"name": "Ancestral Recall", "set": "Alpha", "price": 8500.00, "condition": "Lightly Played"},
    {"name": "Time Walk", "set": "Beta", "price": 4200.00, "condition": "Near Mint"},
    {"name": "Mox Sapphire", "set": "Alpha", "price": 7800.00, "condition": "Moderately Played"},
    {"name": "Mox Ruby", "set": "Beta", "price": 6500.00, "condition": "Near Mint"},
    {"name": "Mox Pearl", "set": "Alpha", "price": 5200.00, "condition": "Lightly Played"},
    {"name": "Mox Emerald", "set": "Unlimited", "price": 4800.00, "condition": "Near Mint"},
    {"name": "Mox Jet", "set": "Alpha", "price": 5800.00, "condition": "Moderately Played"},
    {"name": "Time Twister", "set": "Beta", "price": 3200.00, "condition": "Near Mint"},
    {"name": "Underground Sea", "set": "Revised", "price": 850.00, "condition": "Near Mint"},
    {"name": "Tropical Island", "set": "Revised", "price": 750.00, "condition": "Lightly Played"},
    {"name": "Volcanic Island", "set": "Revised", "price": 950.00, "condition": "Near Mint"},
    {"name": "Tundra", "set": "Revised", "price": 650.00, "condition": "Near Mint"},
    {"name": "Bayou", "set": "Revised", "price": 720.00, "condition": "Lightly Played"},
    {"name": "Savannah", "set": "Revised", "price": 580.00, "condition": "Near Mint"},
    {"name": "Scrubland", "set": "Revised", "price": 520.00, "condition": "Moderately Played"},
    {"name": "Badlands", "set": "Revised", "price": 480.00, "condition": "Near Mint"},
    {"name": "Plateau", "set": "Revised", "price": 420.00, "condition": "Lightly Played"},
    {"name": "Taiga", "set": "Revised", "price": 380.00, "condition": "Near Mint"},
    {"name": "Force of Will", "set": "Alliances", "price": 85.00, "condition": "Near Mint"},
    {"name": "Wasteland", "set": "Tempest", "price": 45.00, "condition": "Near Mint"},
    {"name": "Gaea's Cradle", "set": "Urza's Saga", "price": 920.00, "condition": "Near Mint"},
    {"name": "Serra's Sanctum", "set": "Urza's Saga", "price": 380.00, "condition": "Lightly Played"},
    {"name": "Tolarian Academy", "set": "Urza's Saga", "price": 95.00, "condition": "Near Mint"},
    {"name": "Library of Alexandria", "set": "Arabian Nights", "price": 1850.00, "condition": "Moderately Played"},
    {"name": "Bazaar of Baghdad", "set": "Arabian Nights", "price": 1200.00, "condition": "Near Mint"},
    {"name": "The Tabernacle at Pendrell Vale", "set": "Legends", "price": 3200.00, "condition": "Lightly Played"},
    {"name": "Moat", "set": "Legends", "price": 650.00, "condition": "Near Mint"},
    {"name": "The Abyss", "set": "Legends", "price": 750.00, "condition": "Moderately Played"},
    {"name": "Chains of Mephistopheles", "set": "Legends", "price": 480.00, "condition": "Near Mint"}
]

print(f"Created database with {len(vintage_mtg_cards)} vintage MTG cards")

Created database with 30 vintage MTG cards


In [5]:
# Generate fake customer data
def generate_customer_data():
    """Generate realistic customer information"""
    customer = {
        "name": fake.name(),
        "email": fake.email(),
        "phone": fake.phone_number(),
        "billing_address": {
            "street": fake.street_address(),
            "city": fake.city(),
            "state": fake.state(),
            "zip_code": fake.zipcode(),
            "country": "USA"
        }
    }
    
    # 70% chance shipping address is same as billing
    if random.random() < 0.7:
        customer["shipping_address"] = customer["billing_address"].copy()
    else:
        customer["shipping_address"] = {
            "street": fake.street_address(),
            "city": fake.city(),
            "state": fake.state(),
            "zip_code": fake.zipcode(),
            "country": "USA"
        }
    
    return customer

In [7]:
# Cell 4: Generate invoice data
def generate_invoice_data(num_items=None):
    """Generate a complete invoice with random items"""
    
    # Generate customer
    customer = generate_customer_data()
    
    # Generate invoice details
    invoice_number = f"INV-{random.randint(10000, 99999)}"
    invoice_date = fake.date_between(start_date='-30d', end_date='today')
    due_date = invoice_date + timedelta(days=30)
    
    # Select random cards (2-6 items)
    if num_items is None:
        num_items = random.randint(2, 6)
    
    selected_cards = random.sample(vintage_mtg_cards, num_items)
    
    # Generate quantities and calculate totals
    items = []
    subtotal = 0
    
    for card in selected_cards:
        quantity = random.randint(1, 3)
        line_total = card["price"] * quantity
        
        items.append({
            "description": f"{card['name']} ({card['set']}) - {card['condition']}",
            "quantity": quantity,
            "unit_price": card["price"],
            "total": line_total
        })
        
        subtotal += line_total
    
    # Calculate tax and total
    tax_rate = 0.05  # 5%
    tax_amount = subtotal * tax_rate
    grand_total = subtotal + tax_amount
    
    invoice_data = {
        "invoice_number": invoice_number,
        "invoice_date": invoice_date,
        "due_date": due_date,
        "customer": customer,
        "items": items,
        "subtotal": subtotal,
        "tax_rate": tax_rate,
        "tax_amount": tax_amount,
        "grand_total": grand_total
    }
    
    return invoice_data

In [12]:
# Cell 5: Create PDF invoice generator
def create_pdf_invoice(invoice_data,training_set=False, filename=None):
    """Generate a PDF invoice from invoice data"""
    
    if filename is None:

        directory:str = None

        if training_set:
            directory = "training"
        else:            
            directory = "invoices"

        os.makedirs(directory, exist_ok=True)
        filename = f"{directory}/invoice_{invoice_data['invoice_number']}.pdf"
    
    # Create document
    doc = SimpleDocTemplate(filename, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []
    
    # Company header
    company_style = ParagraphStyle(
        'CompanyHeader',
        parent=styles['Heading1'],
        fontSize=24,
        textColor=colors.darkblue,
        spaceAfter=30
    )
    story.append(Paragraph("Contoso TCG", company_style))
    story.append(Paragraph("Premium Magic: The Gathering Vintage Cards", styles['Normal']))
    story.append(Spacer(1, 20))
    
    # Invoice header
    header_data = [
        ['Invoice Number:', invoice_data['invoice_number']],
        ['Invoice Date:', invoice_data['invoice_date'].strftime('%B %d, %Y')],
        ['Due Date:', invoice_data['due_date'].strftime('%B %d, %Y')]
    ]
    
    header_table = Table(header_data, colWidths=[2*inch, 3*inch])
    header_table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 12),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 12),
    ]))
    story.append(header_table)
    story.append(Spacer(1, 30))
    
    # Customer information
    customer = invoice_data['customer']
    billing_addr = customer['billing_address']
    shipping_addr = customer['shipping_address']
    
    # Create side-by-side address layout
    address_data = [
        ['Bill To:', 'Ship To:'],
        [customer['name'], customer['name']],
        [billing_addr['street'], shipping_addr['street']],
        [f"{billing_addr['city']}, {billing_addr['state']} {billing_addr['zip_code']}", 
         f"{shipping_addr['city']}, {shipping_addr['state']} {shipping_addr['zip_code']}"],
        [customer['email'], ''],
        [customer['phone'], '']
    ]
    
    address_table = Table(address_data, colWidths=[3*inch, 3*inch])
    address_table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 10),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
    ]))
    story.append(address_table)
    story.append(Spacer(1, 30))
    
    # Items table
    items_data = [['Description', 'Qty', 'Unit Price', 'Total']]
    
    for item in invoice_data['items']:
        items_data.append([
            item['description'],
            str(item['quantity']),
            f"${item['unit_price']:,.2f}",
            f"${item['total']:,.2f}"
        ])
    
    items_table = Table(items_data, colWidths=[4*inch, 0.8*inch, 1.2*inch, 1.2*inch])
    items_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
        ('ALIGN', (0, 0), (0, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 10),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 12),
        ('GRID', (0, 0), (-1, -1), 1, colors.black)
    ]))
    story.append(items_table)
    story.append(Spacer(1, 20))
    
    # Totals
    totals_data = [
        ['Subtotal:', f"${invoice_data['subtotal']:,.2f}"],
        [f'Tax ({invoice_data["tax_rate"]*100:.0f}%):', f"${invoice_data['tax_amount']:,.2f}"],
        ['Grand Total:', f"${invoice_data['grand_total']:,.2f}"]
    ]
    
    totals_table = Table(totals_data, colWidths=[5*inch, 1.5*inch])
    totals_table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (-1, -1), 'RIGHT'),
        ('FONTNAME', (0, -1), (-1, -1), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 12),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
        ('LINEABOVE', (0, -1), (-1, -1), 2, colors.black),
    ]))
    story.append(totals_table)
    
    # Build PDF
    doc.build(story)
    print(f"Invoice PDF created: {filename}")
    return filename

In [15]:
def generate_multiple_invoices(count=5, training_set=False):
    """Generate multiple sample invoices"""
    filenames = []
    
    for i in range(count):
        invoice_data = generate_invoice_data()
        # Don't pass filename, let create_pdf_invoice handle the directory logic
        created_filename = create_pdf_invoice(invoice_data=invoice_data, training_set=training_set)
        filenames.append(created_filename)
        
        print(f"Generated invoice {i+1}/{count}: {created_filename}")
        print(f"  Customer: {invoice_data['customer']['name']}")
        print(f"  Total: ${invoice_data['grand_total']:,.2f}")
        print()
    
    return filenames

In [None]:
# Generate test PDF
generate_multiple_invoices(count=1,training_set=True)

In [17]:
# Generate dataset to test our custom model
generate_multiple_invoices(count=10,training_set=False)

Invoice PDF created: invoices/invoice_INV-57096.pdf
Generated invoice 1/10: invoices/invoice_INV-57096.pdf
  Customer: Joshua Compton
  Total: $65,441.25

Invoice PDF created: invoices/invoice_INV-28558.pdf
Generated invoice 2/10: invoices/invoice_INV-28558.pdf
  Customer: Michael Hughes
  Total: $32,403.00

Invoice PDF created: invoices/invoice_INV-57347.pdf
Generated invoice 3/10: invoices/invoice_INV-57347.pdf
  Customer: Michelle Jones
  Total: $4,940.25

Invoice PDF created: invoices/invoice_INV-22749.pdf
Generated invoice 4/10: invoices/invoice_INV-22749.pdf
  Customer: Kara Garcia
  Total: $6,237.00

Invoice PDF created: invoices/invoice_INV-38372.pdf
Generated invoice 5/10: invoices/invoice_INV-38372.pdf
  Customer: Renee Perry
  Total: $2,478.00

Invoice PDF created: invoices/invoice_INV-74991.pdf
Generated invoice 6/10: invoices/invoice_INV-74991.pdf
  Customer: Rebecca Smith
  Total: $21,273.00

Invoice PDF created: invoices/invoice_INV-73980.pdf
Generated invoice 7/10: invo

['invoices/invoice_INV-57096.pdf',
 'invoices/invoice_INV-28558.pdf',
 'invoices/invoice_INV-57347.pdf',
 'invoices/invoice_INV-22749.pdf',
 'invoices/invoice_INV-38372.pdf',
 'invoices/invoice_INV-74991.pdf',
 'invoices/invoice_INV-73980.pdf',
 'invoices/invoice_INV-11790.pdf',
 'invoices/invoice_INV-62088.pdf',
 'invoices/invoice_INV-94649.pdf']