# California County Electricity Rates Scraper

This notebook scrapes electricity rates by county from the MyKWhNow California Rates Map.

**Source**: https://mykwhnow.com/tools/california-rates

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import time
import re
from datetime import datetime

In [2]:
# All 58 California Counties
CA_COUNTIES = [
    'Alameda', 'Alpine', 'Amador', 'Butte', 'Calaveras', 'Colusa', 'Contra Costa',
    'Del Norte', 'El Dorado', 'Fresno', 'Glenn', 'Humboldt', 'Imperial', 'Inyo',
    'Kern', 'Kings', 'Lake', 'Lassen', 'Los Angeles', 'Madera', 'Marin', 'Mariposa',
    'Mendocino', 'Merced', 'Modoc', 'Mono', 'Monterey', 'Napa', 'Nevada', 'Orange',
    'Placer', 'Plumas', 'Riverside', 'Sacramento', 'San Benito', 'San Bernardino',
    'San Diego', 'San Francisco', 'San Joaquin', 'San Luis Obispo', 'San Mateo',
    'Santa Barbara', 'Santa Clara', 'Santa Cruz', 'Shasta', 'Sierra', 'Siskiyou',
    'Solano', 'Sonoma', 'Stanislaus', 'Sutter', 'Tehama', 'Trinity', 'Tulare',
    'Tuolumne', 'Ventura', 'Yolo', 'Yuba'
]

print(f"Total California Counties: {len(CA_COUNTIES)}")
print(f"Counties: {', '.join(CA_COUNTIES[:10])}...")


Total California Counties: 58
Counties: Alameda, Alpine, Amador, Butte, Calaveras, Colusa, Contra Costa, Del Norte, El Dorado, Fresno...


# Helper Functions

def categorize_rate(rate):
    if rate is None:
        return 'Unknown'
    elif rate < 0.39:
        return 'Lowest'
    elif rate < 0.46:
        return 'Low'
    elif rate == 0.46:
        return 'Medium'
    else:
        return 'Highest'

print("Helper function defined")

In [3]:
def categorize_rate(rate):
    if rate is None:
        return 'Unknown'
    elif rate < 0.39:
        return 'Lowest'
    elif rate < 0.46:
        return 'Low'
    elif rate == 0.46:
        return 'Medium'
    else:
        return 'Highest'

In [4]:
# Create template DataFrame for all 58 California counties
county_rates = []

for county in CA_COUNTIES:
    county_rates.append({
        'county_name': county,
        'rate_per_kwh': None,
        'rate_tier': None,
        'utility_code': None,
        'last_updated': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    })

df_county_rates = pd.DataFrame(county_rates)

print("County rates template created")
print(f"Total counties: {len(df_county_rates)}")
print("\nTemplate preview:")
df_county_rates.head(10)


County rates template created
Total counties: 58

Template preview:


Unnamed: 0,county_name,rate_per_kwh,rate_tier,utility_code,last_updated
0,Alameda,,,,2025-11-30 10:41:46
1,Alpine,,,,2025-11-30 10:41:46
2,Amador,,,,2025-11-30 10:41:46
3,Butte,,,,2025-11-30 10:41:46
4,Calaveras,,,,2025-11-30 10:41:46
5,Colusa,,,,2025-11-30 10:41:46
6,Contra Costa,,,,2025-11-30 10:41:46
7,Del Norte,,,,2025-11-30 10:41:46
8,El Dorado,,,,2025-11-30 10:41:46
9,Fresno,,,,2025-11-30 10:41:46


In [5]:
# Manually enter county rates here
# Based on MyKWhNow California Rates Map

# Counties with rate 0.3869
counties_0_3869 = ['San Bernardino', 'Riverside', 'Orange', 'Ventura', 'Inyo', 'Mono', 'Sacramento']
for county in counties_0_3869:
    df_county_rates.loc[df_county_rates['county_name'] == county, 'rate_per_kwh'] = 0.3869

# Counties with rate 0.30
counties_0_30 = ['Los Angeles', 'Imperial', 'Alpine', 'Del Norte', 'Siskiyou', 'Modoc']
for county in counties_0_30:
    df_county_rates.loc[df_county_rates['county_name'] == county, 'rate_per_kwh'] = 0.30

# All remaining counties get rate 0.4597
remaining_counties = [c for c in CA_COUNTIES if c not in counties_0_3869 and c not in counties_0_30]
for county in remaining_counties:
    df_county_rates.loc[df_county_rates['county_name'] == county, 'rate_per_kwh'] = 0.4597

# Apply rate tier categorization
df_county_rates['rate_tier'] = df_county_rates['rate_per_kwh'].apply(categorize_rate)

print("Rates updated for all 58 California counties")
print(f"\nCounties with rates: {df_county_rates['rate_per_kwh'].notna().sum()}")
print(f"Counties without rates: {df_county_rates['rate_per_kwh'].isna().sum()}")

print("\nRate Distribution:")
print(df_county_rates['rate_per_kwh'].value_counts().sort_index())

print("\nRate Tier Distribution:")
print(df_county_rates['rate_tier'].value_counts())


Rates updated for all 58 California counties

Counties with rates: 58
Counties without rates: 0

Rate Distribution:
rate_per_kwh
0.3        6
0.3869     7
0.4597    45
Name: count, dtype: int64

Rate Tier Distribution:
rate_tier
Low       45
Lowest    13
Name: count, dtype: int64


In [6]:
# View current status
print("Current Dataset Status:")
print("=" * 60)
print(f"Total counties: {len(df_county_rates)}")
print(f"Counties with rates: {df_county_rates['rate_per_kwh'].notna().sum()}")
print(f"Counties without rates: {df_county_rates['rate_per_kwh'].isna().sum()}")

if df_county_rates['rate_per_kwh'].notna().sum() > 0:
    print(f"\nRate Statistics:")
    print(df_county_rates['rate_per_kwh'].describe())
    
    print(f"\nRate Tier Distribution:")
    print(df_county_rates['rate_tier'].value_counts())

if df_county_rates['rate_per_kwh'].isna().sum() > 0:
    print(f"\nCounties still missing rates:")
    missing_counties = df_county_rates[df_county_rates['rate_per_kwh'].isna()]['county_name'].tolist()
    print(f"Total: {len(missing_counties)}")
    print(missing_counties)


Current Dataset Status:
Total counties: 58
Counties with rates: 58
Counties without rates: 0

Rate Statistics:
count     58.0000
unique     3.0000
top        0.4597
freq      45.0000
Name: rate_per_kwh, dtype: float64

Rate Tier Distribution:
rate_tier
Low       45
Lowest    13
Name: count, dtype: int64


In [7]:
# Save county rates to CSV
output_file = 'ca_county_rates.csv'
df_county_rates.to_csv(output_file, index=False)

print(f"County rates saved to: {output_file}")
print(f"\nDataset Summary:")
print(f"- Total counties: {len(df_county_rates)}")
print(f"- Counties with rates: {df_county_rates['rate_per_kwh'].notna().sum()}")
print(f"- Columns: {list(df_county_rates.columns)}")

# Display the full dataset
print(f"\nAll County Rates:")
df_county_rates


County rates saved to: ca_county_rates.csv

Dataset Summary:
- Total counties: 58
- Counties with rates: 58
- Columns: ['county_name', 'rate_per_kwh', 'rate_tier', 'utility_code', 'last_updated']

All County Rates:


Unnamed: 0,county_name,rate_per_kwh,rate_tier,utility_code,last_updated
0,Alameda,0.4597,Low,,2025-11-30 10:41:46
1,Alpine,0.3,Lowest,,2025-11-30 10:41:46
2,Amador,0.4597,Low,,2025-11-30 10:41:46
3,Butte,0.4597,Low,,2025-11-30 10:41:46
4,Calaveras,0.4597,Low,,2025-11-30 10:41:46
5,Colusa,0.4597,Low,,2025-11-30 10:41:46
6,Contra Costa,0.4597,Low,,2025-11-30 10:41:46
7,Del Norte,0.3,Lowest,,2025-11-30 10:41:46
8,El Dorado,0.4597,Low,,2025-11-30 10:41:46
9,Fresno,0.4597,Low,,2025-11-30 10:41:46


In [8]:
# Load and Process ZIP Code to County Mappings

import glob
import os

# Get all ZIP code mapping files
zip_files = glob.glob('zip_code_to_county/zip_county_fips_*.csv')
print(f"Found {len(zip_files)} ZIP code mapping files")

# Load the most recent file
if zip_files:
    latest_file = sorted(zip_files)[-1]
    print(f"Using: {latest_file}")
    
    df_zip = pd.read_csv(latest_file)
    print(f"\nLoaded {len(df_zip)} ZIP code mappings")
    print(f"Columns: {list(df_zip.columns)}")
    print("\nSample data:")
    print(df_zip.head())
else:
    print("No ZIP code mapping files found!")


Found 31 ZIP code mapping files
Using: zip_code_to_county/zip_county_fips_2018_03.csv

Loaded 53962 ZIP code mappings
Columns: ['zip', 'stcountyfp', 'city', 'state', 'countyname', 'classfp']

Sample data:
     zip  stcountyfp           city state      countyname classfp
0  36091        1001        Verbena    AL  Autauga County      H1
1  36758        1001  Plantersville    AL  Autauga County      H1
2  36006        1001    Billingsley    AL  Autauga County      H1
3  36067        1001     Prattville    AL  Autauga County      H1
4  36701        1001          Selma    AL  Autauga County      H1


In [9]:
# Filter for California ZIP codes only

# California state FIPS code is 06
df_ca_zip = df_zip[df_zip['state'] == 'CA'].copy()

print(f"California ZIP codes: {len(df_ca_zip)}")
print(f"Unique counties: {df_ca_zip['countyname'].nunique()}")

# Clean county names (remove " County" suffix)
df_ca_zip['county_clean'] = df_ca_zip['countyname'].str.replace(' County', '').str.strip()

print("\nCalifornia counties in ZIP data:")
ca_counties_in_data = sorted(df_ca_zip['county_clean'].unique())
print(f"Total: {len(ca_counties_in_data)}")
print(ca_counties_in_data[:10])


California ZIP codes: 2645
Unique counties: 58

California counties in ZIP data:
Total: 58
['Alameda', 'Alpine', 'Amador', 'Butte', 'Calaveras', 'Colusa', 'Contra Costa', 'Del Norte', 'El Dorado', 'Fresno']


In [10]:
# Create clean ZIP to county mapping DataFrame
df_ca_zip_clean = df_ca_zip[['zip', 'county_clean']].copy()
df_ca_zip_clean.columns = ['zip_code', 'county_name']

# Remove duplicates (keep first occurrence)
df_ca_zip_clean = df_ca_zip_clean.drop_duplicates(subset=['zip_code'])

print(f"Clean ZIP to county mapping created")
print(f"Total unique ZIP codes: {len(df_ca_zip_clean)}")
print(f"\nSample mapping:")
print(df_ca_zip_clean.head(10))

# Save to CSV
df_ca_zip_clean.to_csv('ca_zip_to_county.csv', index=False)
print(f"\nSaved to: ca_zip_to_county.csv")


Clean ZIP to county mapping created
Total unique ZIP codes: 2447

Sample mapping:
      zip_code county_name
2904     94501     Alameda
2905     94560     Alameda
2906     94577     Alameda
2907     94578     Alameda
2908     94550     Alameda
2909     94611     Alameda
2910     94705     Alameda
2911     94505     Alameda
2912     94568     Alameda
2913     94551     Alameda

Saved to: ca_zip_to_county.csv


In [11]:
# Load EV Charging Stations Data
print("Loading EV charging stations data...")

# Load the cleaned February 2024 stations data
stations_file = '../ev_charging_stations/ev_charging_stations_feb2024_cleaned.csv'

try:
    df_stations = pd.read_csv(stations_file)
    print(f"Loaded {len(df_stations)} charging stations")
    print(f"Columns: {list(df_stations.columns)}")
except FileNotFoundError:
    print(f"Error: Could not find {stations_file}")
    print("Make sure the ev_charging_stations data has been processed first")


Loading EV charging stations data...
Loaded 65134 charging stations
Columns: ['Station Name', 'Street Address', 'City', 'State', 'ZIP', 'EV Level1 EVSE Num', 'EV Level2 EVSE Num', 'EV DC Fast Count', 'EV Network', 'EV Connector Types', 'Access Code', 'Facility Type', 'Total_Chargers', 'Has_Level1', 'Has_Level2', 'Has_DC_Fast']


  df_stations = pd.read_csv(stations_file)


In [12]:
# Filter for California stations only
df_ca_stations = df_stations[df_stations['State'] == 'CA'].copy()

print(f"California charging stations: {len(df_ca_stations)}")
print(f"\nCalifornia stations by city (top 10):")
print(df_ca_stations['City'].value_counts().head(10))


California charging stations: 16455

California stations by city (top 10):
City
Los Angeles      1563
San Diego         712
Irvine            643
San Jose          570
San Francisco     541
Menlo Park        380
Sacramento        368
Santa Clara       353
Long Beach        228
Oakland           225
Name: count, dtype: int64


In [13]:
# Map ZIP codes to counties for charging stations
print("Mapping charging stations to counties...")

# Merge stations with ZIP to county mapping
df_stations_with_county = df_ca_stations.merge(
    df_ca_zip_clean[['zip_code', 'county_name']], 
    left_on='ZIP', 
    right_on='zip_code', 
    how='left'
)

print(f"Stations mapped to counties: {df_stations_with_county['county_name'].notna().sum()}")
print(f"Stations without county mapping: {df_stations_with_county['county_name'].isna().sum()}")

if df_stations_with_county['county_name'].isna().sum() > 0:
    print(f"\nStations with missing county (showing first 10):")
    print(df_stations_with_county[df_stations_with_county['county_name'].isna()][['Station Name', 'City', 'ZIP']].head(10))


Mapping charging stations to counties...
Stations mapped to counties: 9528
Stations without county mapping: 6927

Stations with missing county (showing first 10):
                                           Station Name                 City  \
1753              Fogline Vineyards - Tesla Destination               Fulton   
2055  Four Seasons Hotel Los Angeles at Beverly Hill...          Los Angeles   
2184                                       Beacon Solar               Cantil   
9531                               3651 S. Vermont Ave.          Los Angeles   
9532                                76 - Valley Village       Valley Village   
9533   South San Francisco - Miller Garage HPCP Array 1  South San Francisco   
9534                                         SOMA Grand        San Francisco   
9535         BOA Mission-Sorenson CA4-160 (Hayward, CA)              Hayward   
9536                       French Valley Village Center           Winchester   
9537                                 

In [14]:
# Add electricity rates to stations based on county
print("Adding electricity rates to charging stations...")

# Merge with county rates
df_final_stations = df_stations_with_county.merge(
    df_county_rates[['county_name', 'rate_per_kwh', 'rate_tier']], 
    on='county_name', 
    how='left'
)

# Rename rate column for clarity
df_final_stations.rename(columns={'rate_per_kwh': 'electricity_rate_per_kwh'}, inplace=True)

print(f"Stations with electricity rates: {df_final_stations['electricity_rate_per_kwh'].notna().sum()}")
print(f"\nDataset columns: {list(df_final_stations.columns)}")

# Show summary
print(f"\nElectricity Rate Distribution:")
print(df_final_stations['electricity_rate_per_kwh'].value_counts().sort_index())

print(f"\nStations by County (top 10):")
print(df_final_stations['county_name'].value_counts().head(10))


Adding electricity rates to charging stations...
Stations with electricity rates: 9528

Dataset columns: ['Station Name', 'Street Address', 'City', 'State', 'ZIP', 'EV Level1 EVSE Num', 'EV Level2 EVSE Num', 'EV DC Fast Count', 'EV Network', 'EV Connector Types', 'Access Code', 'Facility Type', 'Total_Chargers', 'Has_Level1', 'Has_Level2', 'Has_DC_Fast', 'zip_code', 'county_name', 'electricity_rate_per_kwh', 'rate_tier']

Electricity Rate Distribution:
electricity_rate_per_kwh
0.3       2206
0.3869    2064
0.4597    5258
Name: count, dtype: int64

Stations by County (top 10):
county_name
Los Angeles       2175
Orange            1109
Santa Clara       1100
San Mateo          749
San Diego          680
Alameda            576
Riverside          348
Sacramento         261
Contra Costa       230
San Bernardino     201
Name: count, dtype: int64


In [15]:
# Save the final dataset
output_file = 'ev_charging_stations_county_prices.csv'
df_final_stations.to_csv(output_file, index=False)

print(f"Dataset saved to: {output_file}")
print(f"\nFinal Dataset Summary:")
print(f"- Total California charging stations: {len(df_final_stations)}")
print(f"- Stations with county info: {df_final_stations['county_name'].notna().sum()}")
print(f"- Stations with electricity rates: {df_final_stations['electricity_rate_per_kwh'].notna().sum()}")
print(f"- Total columns: {len(df_final_stations.columns)}")

print(f"\nSample data:")
df_final_stations[['Station Name', 'City', 'county_name', 'electricity_rate_per_kwh', 'rate_tier']].head(10)


Dataset saved to: ev_charging_stations_county_prices.csv

Final Dataset Summary:
- Total California charging stations: 16455
- Stations with county info: 9528
- Stations with electricity rates: 9528
- Total columns: 20

Sample data:


Unnamed: 0,Station Name,City,county_name,electricity_rate_per_kwh,rate_tier
0,LADWP - Truesdale Center,Sun Valley,Los Angeles,0.3,Lowest
1,Los Angeles Convention Center,Los Angeles,Los Angeles,0.3,Lowest
2,LADWP - John Ferraro Building,Los Angeles,Los Angeles,0.3,Lowest
3,LADWP - Haynes Power Plant,Long Beach,Los Angeles,0.3,Lowest
4,LADWP - Harbor Generating Station,Wilmington,Los Angeles,0.3,Lowest
5,LADWP - Sylmar West,Sylmar,Los Angeles,0.3,Lowest
6,LADWP - EV Service Center,Los Angeles,Los Angeles,0.3,Lowest
7,LADWP - Fairfax Center,Los Angeles,Los Angeles,0.3,Lowest
8,California Air Resources Board,El Monte,Los Angeles,0.3,Lowest
9,LADWP - Palmetto Center,Los Angeles,Los Angeles,0.3,Lowest


In [16]:
# Verify county names match between datasets
print("Checking county name consistency...")

zip_counties = set(df_ca_zip_clean['county_name'].unique())
rate_counties = set(CA_COUNTIES)

print(f"\nCounties in ZIP data: {len(zip_counties)}")
print(f"Counties in rate template: {len(rate_counties)}")

# Counties in ZIP data but not in rate template
missing_in_rates = zip_counties - rate_counties
if missing_in_rates:
    print(f"\nCounties in ZIP data but not in rate list: {missing_in_rates}")

# Counties in rate template but not in ZIP data  
missing_in_zip = rate_counties - zip_counties
if missing_in_zip:
    print(f"\nCounties in rate list but not in ZIP data: {missing_in_zip}")

if not missing_in_rates and not missing_in_zip:
    print("\nAll county names match perfectly!")


Checking county name consistency...

Counties in ZIP data: 58
Counties in rate template: 58

All county names match perfectly!
