# Electric Bill Parser Development

In [8]:
import pdfplumber
import re

In [130]:
file = "/Users/ericmelz/Data/Bills/Electric/2025/2025_07_09.pdf"

In [131]:
pages = []

In [132]:
with pdfplumber.open(file) as pdf:
    for page in pdf.pages:
        text = page.extract_text()
        pages.append(text)

In [133]:
len(pages)

6

In [135]:
page3 = pages[3]

In [136]:
page4 = pages[4]

In [137]:
def extract_delivery_usage_charges(pages):
    data_rows = []
    for page in pages:
        delivery_matches = re.findall(r'(Onpeak|Midpeak|Offpeak) (\d{1,3}(?:,\d{3})*)kWhx\$(\d+\.\d+) \$(\d+.\d+)', page)
        for tou, kwh, rate, amount in delivery_matches:
            data_rows.append([tou, kwh.replace(',', ''), rate, amount])
    return data_rows

In [138]:
delivery_usage_charges = extract_delivery_usage_charges(pages)
delivery_usage_charges

[['Onpeak', '156', '0.34124', '53.23'],
 ['Midpeak', '53', '0.34124', '18.09'],
 ['Offpeak', '1374', '0.29399', '403.94']]

In [139]:
def extract_basic_charge(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'(Basiccharge) (\d+)daysx\$(\d\.\d+) \$(\d.\d+)', page)
        for chargetype, days, rate, amount in matches:
            data_rows.append([chargetype, days, rate, amount])
    return data_rows    

In [140]:
basic_charge = extract_basic_charge(pages)
basic_charge

[['Basiccharge', '29', '0.03100', '0.90']]

In [151]:
def extract_delivery_surcharges(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'(Baselinecredit|PCIA|CCAwildfirefundcharge|CTC|Fixedrecoverycharge) (\d{1,3}(?:,\d{3})*)kWhx(-?\$\d.\d+) (-?\$\d+.\d+)', page)
        for chargetype, kwh, rate, amount in matches:
            data_rows.append([chargetype, kwh.replace(',', ''), rate.replace('$', ''), amount.replace('$', '')])
    return data_rows    

In [152]:
delivery_surcharges = extract_delivery_surcharges(pages)
delivery_surcharges

[['Baselinecredit', '490', '-0.09250', '-45.33'],
 ['PCIA', '1583', '-0.01227', '-19.42'],
 ['CCAwildfirefundcharge', '1583', '0.00595', '9.42'],
 ['CTC', '1583', '-0.00058', '-0.92'],
 ['Fixedrecoverycharge', '1583', '0.00198', '3.13']]

In [156]:
re.findall(r'(GenerationMunicipalSurcharge) \$(\d+.\d+)', page4)

[('GenerationMunicipalSurcharge', '1.43')]

In [158]:
def extract_generation_municipal_surcharges(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'(GenerationMunicipalSurcharge) \$(\d+.\d+)', page)
        for chargetype, amount in matches:
            data_rows.append([chargetype, amount])
    return data_rows    

In [163]:
generation_municipal_surcharge = extract_generation_municipal_surcharges(pages)
generation_municipal_surcharge

[['GenerationMunicipalSurcharge', '1.43']]

In [164]:
sum([float(row[-1]) for row in delivery_usage_charges]) + \
sum([float(row[-1]) for row in basic_charge]) + \
sum([float(row[-1]) for row in delivery_surcharges]) + \
sum([float(row[-1]) for row in generation_municipal_surcharge])

424.46999999999997