# Electric Bill Parser Development

In [8]:
import pdfplumber
import re

In [130]:
file = "/Users/ericmelz/Data/Bills/Electric/2025/2025_07_09.pdf"

In [131]:
pages = []

In [132]:
with pdfplumber.open(file) as pdf:
    for page in pdf.pages:
        text = page.extract_text()
        pages.append(text)

In [133]:
len(pages)

6

In [165]:
page3 = pages[3]
page4 = pages[4]
page5 = pages[5]

In [137]:
def extract_delivery_usage_charges(pages):
    data_rows = []
    for page in pages:
        delivery_matches = re.findall(r'(Onpeak|Midpeak|Offpeak) (\d{1,3}(?:,\d{3})*)kWhx\$(\d+\.\d+) \$(\d+.\d+)', page)
        for tou, kwh, rate, amount in delivery_matches:
            data_rows.append([tou, kwh.replace(',', ''), rate, amount])
    return data_rows

In [138]:
delivery_usage_charges = extract_delivery_usage_charges(pages)
delivery_usage_charges

[['Onpeak', '156', '0.34124', '53.23'],
 ['Midpeak', '53', '0.34124', '18.09'],
 ['Offpeak', '1374', '0.29399', '403.94']]

In [139]:
def extract_basic_charge(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'(Basiccharge) (\d+)daysx\$(\d\.\d+) \$(\d.\d+)', page)
        for chargetype, days, rate, amount in matches:
            data_rows.append([chargetype, days, rate, amount])
    return data_rows    

In [140]:
basic_charge = extract_basic_charge(pages)
basic_charge

[['Basiccharge', '29', '0.03100', '0.90']]

In [151]:
def extract_delivery_surcharges(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'(Baselinecredit|PCIA|CCAwildfirefundcharge|CTC|Fixedrecoverycharge) (\d{1,3}(?:,\d{3})*)kWhx(-?\$\d.\d+) (-?\$\d+.\d+)', page)
        for chargetype, kwh, rate, amount in matches:
            data_rows.append([chargetype, kwh.replace(',', ''), rate.replace('$', ''), amount.replace('$', '')])
    return data_rows    

In [152]:
delivery_surcharges = extract_delivery_surcharges(pages)
delivery_surcharges

[['Baselinecredit', '490', '-0.09250', '-45.33'],
 ['PCIA', '1583', '-0.01227', '-19.42'],
 ['CCAwildfirefundcharge', '1583', '0.00595', '9.42'],
 ['CTC', '1583', '-0.00058', '-0.92'],
 ['Fixedrecoverycharge', '1583', '0.00198', '3.13']]

In [156]:
re.findall(r'(GenerationMunicipalSurcharge) \$(\d+.\d+)', page4)

[('GenerationMunicipalSurcharge', '1.43')]

In [158]:
def extract_generation_municipal_surcharges(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'(GenerationMunicipalSurcharge) \$(\d+.\d+)', page)
        for chargetype, amount in matches:
            data_rows.append([chargetype, amount])
    return data_rows    

In [163]:
generation_municipal_surcharge = extract_generation_municipal_surcharges(pages)
generation_municipal_surcharge

[['GenerationMunicipalSurcharge', '1.43']]

In [164]:
sum([float(row[-1]) for row in delivery_usage_charges]) + \
sum([float(row[-1]) for row in basic_charge]) + \
sum([float(row[-1]) for row in delivery_surcharges]) + \
sum([float(row[-1]) for row in generation_municipal_surcharge])

424.46999999999997

In [166]:
page5

"MELZ,ERIC/ Page6of6\nService account 8001779471 POD-ID SUPPLY/GENERATION\nService address 300SREEVESDR 101760940001603559\nCLEANPOWERALLIANCE\nBEVERLYHILLS,CA90212\nRotating outage GroupA037 supplies your electricity\nDetails of your new charges SUM2\nCLEAN POWERALLIANCE\nYourrate:TOU-D-5\nServiceAccount:8001779471\nBillingperiod:06/04/25to07/02/25(29days)\nGeneration Charges\n100%GreenPower-On-Peak- 138.4kWh@0.47105 $65.19\nSummer\n100%GreenPower-Mid-Peak- 53.2kWh@0.24747 $13.17\nSummer\n100%GreenPower-Off-Peak- 1276kWh@0.08431 $107.58\nSummer\n100%GreenPower-On-Peak- 17.2kWh@0.46098 $7.93\nSummer\n100%GreenPower-Mid-Peak- 0kWh@0.24453 $0.00\nSummer\n100%GreenPower-Off-Peak- 97.6kWh@0.08578 $8.37\nSummer\nEnergySurcharge $0.47\nSub-TotalofCPAGenerationCharges $202.71\nYour NewCharges $202.71\nRate Identification Number - RIN SUM2\nUSCA-XXCP-0110-0000\nThisRINmayhelpprogramsmartdevices.Learnmoreatenergy.ca.gov.\nThings you should know\nNew CPA rates go into effect July 1\nCPAimplement

In [177]:
def extract_generation_usage_charges(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'100%GreenPower-(.*-Peak)- (\d+.\d+)kWh@(\d+.\d+) \$(\d+.\d+)', page)
        for tou, kwh, rate, amount in matches:
            data_rows.append([tou, kwh, rate, amount])
    return data_rows    

In [178]:
generation_usage_charges = extract_generation_usage_charges(pages)
generation_usage_charges

[['On-Peak', '138.4', '0.47105', '65.19'],
 ['Mid-Peak', '53.2', '0.24747', '13.17'],
 ['Off-Peak', '1276', '0.08431', '107.58'],
 ['On-Peak', '17.2', '0.46098', '7.93'],
 ['Off-Peak', '97.6', '0.08578', '8.37']]

In [179]:
re.findall(r'(EnergySurcharge) \$(\d+.\d+)', page5)

[('EnergySurcharge', '0.47')]

In [185]:
def extract_energy_surcharge(pages):
    data_rows = []
    for page in pages:
        matches = re.findall(r'(EnergySurcharge) \$(\d+.\d+)', page)
        for charge, amount in matches:
            data_rows.append([charge, amount])
    return data_rows    

In [186]:
extract_energy_surcharge(pages)

[['EnergySurcharge', '0.47']]

In [188]:
def compute_total(pages):
    delivery_usage_charges = extract_delivery_usage_charges(pages)
    basic_charge = extract_basic_charge(pages)
    delivery_surcharges = extract_delivery_surcharges(pages)
    generation_municipal_surchage = extract_generation_municipal_surcharges(pages)
    generation_usage_charges = extract_generation_usage_charges(pages)
    energy_surchage = extract_energy_surcharge(pages)
    return sum([float(row[-1]) for row in delivery_usage_charges]) + \
        sum([float(row[-1]) for row in basic_charge]) + \
        sum([float(row[-1]) for row in delivery_surcharges]) + \
        sum([float(row[-1]) for row in generation_municipal_surcharge]) + \
        sum([float(row[-1]) for row in generation_usage_charges]) + \
        sum([float(row[-1]) for row in energy_surchage])

In [189]:
compute_total(pages)

627.1800000000001