# Electric Bill Parser Development

In [34]:
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
import csv
import pdfplumber
import re

@dataclass
class BillLineItem:
    bill_date: str
    category: str
    description: str
    quantity: Optional[float] = None
    unit: Optional[str] = None
    rate: Optional[float] = None
    amount: float = 0.0
    
    def to_csv_row(self):
        return [self.bill_date, self.category, self.description, 
                self.quantity or '', self.unit or '', self.rate or '', self.amount]

def extract_bill_date(pages):
    """Extract bill date from PDF text"""
    all_text = ' '.join(pages)
    # Try common date patterns
    patterns = [
        r'\d{12} (\d{2}/\d{2}/\d{2})'
    ]
    
    for pattern in patterns:
        matches = re.findall(pattern, all_text)
        if matches:
            return matches[0]
    
    # Fallback: extract from filename if available
    filename_match = re.search(r'(\d{4})_(\d{2})_(\d{2})', file)
    if filename_match:
        raise Exception("warning: we matched filename match instead of date pattern.  fix this code")
        year, month, day = filename_match.groups()
        return f"{month}/{day}/{year}"
    
    return "Unknown"

def parse_electric_bill(pages):
    """Unified function to extract all line items from electric bill"""
    bill_date = extract_bill_date(pages)
    line_items = []
    all_text = ' '.join(pages)
    
    # Delivery usage charges (Time of Use)
    delivery_matches = re.findall(r'(Onpeak|Midpeak|Offpeak) (\d{1,3}(?:,\d{3})*)kWhx\$(\d+\.\d+) \$(\d+.\d+)', all_text)
    for tou, kwh, rate, amount in delivery_matches:
        line_items.append(BillLineItem(
            bill_date=bill_date,
            category="Delivery",
            description=f"{tou} Usage",
            quantity=float(kwh.replace(',', '')),
            unit="kWh",
            rate=float(rate),
            amount=float(amount)
        ))
    
    # Basic charge
    basic_matches = re.findall(r'(Basiccharge) (\d+)daysx\$(\d\.\d+) \$(\d.\d+)', all_text)
    for _, days, rate, amount in basic_matches:
        line_items.append(BillLineItem(
            bill_date=bill_date,
            category="Delivery",
            description="Basic Charge",
            quantity=float(days),
            unit="days",
            rate=float(rate),
            amount=float(amount)
        ))
    
    # Delivery surcharges
    surcharge_matches = re.findall(r'(Baselinecredit|PCIA|CCAwildfirefundcharge|CTC|Fixedrecoverycharge) (\d{1,3}(?:,\d{3})*)kWhx(-?\$\d.\d+) (-?\$\d+.\d+)', all_text)
    for charge_type, kwh, rate, amount in surcharge_matches:
        line_items.append(BillLineItem(
            bill_date=bill_date,
            category="Delivery Surcharge",
            description=charge_type.replace('charge', ' charge').replace('credit', ' credit'),
            quantity=float(kwh.replace(',', '')),
            unit="kWh",
            rate=float(rate.replace('$', '')),
            amount=float(amount.replace('$', ''))
        ))

    # Generation municipal surcharge
    municipal_matches = re.findall(r'(GenerationMunicipalSurcharge) \$(\d+.\d+)', all_text)
    for charge_type, amount in municipal_matches:
        line_items.append(BillLineItem(
            bill_date=bill_date,
            category="Generation",
            description="Municipal Surcharge",
            amount=float(amount)
        ))
         
    # Generation usage charges
    gen_matches = re.findall(r'100%GreenPower-(.*-Peak)- (\d+.\d+)kWh@(\d+.\d+) \$(\d+.\d+)', all_text)    
    for tou, kwh, rate, amount in gen_matches:
        line_items.append(BillLineItem(
            bill_date=bill_date,
            category="Generation",
            description=f"Green Power {tou}",
            quantity=float(kwh),
            unit="kWh",
            rate=float(rate),
            amount=float(amount)
        ))
         
    # Energy surcharge
    energy_matches = re.findall(r'(EnergySurcharge) \$(\d+.\d+)', all_text)
    for charge_type, amount in energy_matches:
        line_items.append(BillLineItem(
            bill_date=bill_date,
            category="Generation",
            description="Energy Surcharge",
            amount=float(amount)
        ))
         
    return line_items
         

def get_pages(file):
    pages = []
    with pdfplumber.open(file) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            pages.append(text)

    return pages


def parse_file(file):
    pages = get_pages(file)
    line_items = parse_electric_bill(pages)
    return line_items

In [35]:
file = "/Users/ericmelz/Data/Bills/Electric/2025/2025_07_09.pdf"

In [36]:
line_items = parse_file(file)
sum(item.amount for item in line_items)

627.18