In [4]:
# geospatial analysis
import geopandas as gpd
import restapi # downloading data

# working with data
import numpy as np

# web scraping
import requests
from bs4 import BeautifulSoup
import time
import re

## Electric Grid Headroom

### Downloading SCE

Other relevant links for Southern California Edison:

https://drpep-sce2.opendata.arcgis.com/maps/f76174d459a14545b2b13110d1cc6fba/about

Tutorial: https://drpep.sce.com/drpep/cdn/19/drpep-interactive-user-guide/index.html#/

In [2]:
# download Southern California Edison data
# sce_url = "https://drpep.sce.com/arcgis_server/rest/services/Hosted/AVL_LOAD_CAP_TOGGLE/FeatureServer/1"
sce_url = "https://drpep.sce.com/arcgis_server/rest/services/Hosted/AVL_LOAD_CAP_TOGGLE/FeatureServer/0"
sce_layer = restapi.FeatureLayer(sce_url)
# sce_layer.export_layer("Data/Hosting_Capacity/sce.geojson")



In [34]:
sce_layer.list_fields()

['circuit_name',
 'substation_name',
 'date_last_update',
 'plan_year',
 'system_name',
 'a_bank',
 'alc_year1',
 'alc_year2',
 'alc_year3',
 'alc_year4',
 'alc_year5',
 'subst_alc_year1',
 'subst_alc_year2',
 'subst_alc_year3',
 'subst_alc_year4',
 'subst_alc_year5',
 'a_bank_alc_year1',
 'a_bank_alc_year2',
 'a_bank_alc_year3',
 'a_bank_alc_year4',
 'a_bank_alc_year5',
 'circuit_voltage',
 'org_alc_year1',
 'org_alc_year2',
 'org_alc_year3',
 'org_alc_year4',
 'org_alc_year5',
 'ckt_capacity_y1',
 'ckt_capacity_y2',
 'ckt_capacity_y3',
 'ckt_capacity_y4',
 'ckt_capacity_y5',
 'subst_capacity_y1',
 'subst_capacity_y2',
 'subst_capacity_y3',
 'subst_capacity_y4',
 'subst_capacity_y5',
 'subst_org_alc_year1',
 'subst_org_alc_year2',
 'subst_org_alc_year3',
 'subst_org_alc_year4',
 'subst_org_alc_year5',
 'subtrans',
 'subtrans_alc_year1',
 'subtrans_alc_year2',
 'subtrans_alc_year3',
 'subtrans_alc_year4',
 'subtrans_alc_year5',
 'y1ckt_f',
 'y2ckt_f',
 'y3ckt_f',
 'y4ckt_f',
 'y5ckt_f'

In [28]:
cursor = restapi.SearchCursor(sce_layer, ["objectid"])

In [None]:
# verify SCE data downloaded properly
sce_alc = gpd.read_file("Data/Hosting_Capacity/SCE/Available Load Capacity.geojson")

# for simplicity, let's say year 1 (2024)
sce_alc.drop(columns=sum([[f"alc_year{i}", f"a_bank_alc_year{i}", f"org_alc_year{i}", f"ckt_capacity_y{i}", f"subst_capacity_y{i}", f"subst_org_alc_year{i}", f"subtrans_alc_year{i}", f"y{i}ckt_f", f"y{i}sub_f", f"y{i}subt_f", f"y{i}abank_f", f"y{i}ckt_sub_f", f"y{i}ckt_subt_f", f"y{i}ckt_abank_f", f"y{i}sub_subt_f", f"y{i}sub_abank_f", f"y{i}subt_abank_f", f"y{i}ckt_sub_subt_f", f"y{i}ckt_sub_abank_f", f"y{i}ckt_subt_abank_f", f"y{i}sub_subt_abank_f", f"y{i}ckt_sub_subt_abank_f"] for i in range(2,6)], []), inplace=True)

# sce_alc.to_file("Data/SCE_available_load_capacity.geojson", driver="GeoJSON") # somehow the file size becomes bigger after this

### Downloading PGE

In [20]:
# verify PG&E data download properly
pge_ica = gpd.read_file("Data/Hosting_Capacity/PG&E/ICA_Results/LineDetail.shp")

### Downloading SDG&E

In [2]:
# verify SDG&E data download properly
sdge_ica = gpd.read_file("Data/Hosting_Capacity/SDG&E/ICA_MAP_PROD_LoadCapacityGrids_VW_-864651790911740139.geojson")

In [3]:
sdge_ica

Unnamed: 0,OBJECTID,JOIN_COUNT,TARGET_FID,JOIN_FID,SHAPE_LENG,VOLTAGE,LINE_SEGMENT_NUMBER,ICAWOF_UNIGENERATION,ICAWOF_UNILOAD,ICAWOF_PVGENERATION,OHUG,CIRCUIT_NAME,LABELTEXT,ICAWNOF_UNIGENERATION,ICAWNOF_UNILOAD,ICAWNOF_PVGENERATION,LABELTEXT_LCA,RESTRICTED,SUBID,geometry
0,28922611,,,,,12,141921,0.0,0.8,0.2,OH,456,,0.0,0.8,0.5,Up To 1.00,N,ASH,"POLYGON ((-117.06446 33.13968, -117.06446 33.1..."
1,28922612,,,,,12,141921,0.0,0.8,0.2,OH,456,,0.0,0.8,0.5,Up To 1.00,N,ASH,"POLYGON ((-117.06527 33.13968, -117.06527 33.1..."
2,28922613,,,,,12,141921,0.0,0.8,0.2,OH,456,,0.0,0.8,0.5,Up To 1.00,N,ASH,"POLYGON ((-117.06527 33.13934, -117.06527 33.1..."
3,28922614,,,,,12,139746,0.0,0.0,0.2,OH,456,,0.7,0.0,0.9,0,N,ASH,"POLYGON ((-117.05465 33.14209, -117.05465 33.1..."
4,28922615,,,,,12,139746,0.0,0.0,0.2,OH,456,,0.7,0.0,0.9,0,N,ASH,"POLYGON ((-117.05465 33.14175, -117.05465 33.1..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
477050,29399661,,,,,12,177379,0.0,0.2,0.1,OH,209,,0.0,0.2,0.1,Up To 1.00,N,MELROSE,"POLYGON ((-117.2101 33.23231, -117.2101 33.232..."
477051,29399662,,,,,12,177379,0.0,0.2,0.1,OH,209,,0.0,0.2,0.1,Up To 1.00,N,MELROSE,"POLYGON ((-117.2101 33.23197, -117.2101 33.232..."
477052,29399663,,,,,12,179642,0.0,0.2,0.1,OH,209,,0.0,0.2,0.1,Up To 1.00,N,MELROSE,"POLYGON ((-117.20929 33.23334, -117.20929 33.2..."
477053,29399664,,,,,12,179642,0.0,0.2,0.1,OH,209,,0.0,0.2,0.1,Up To 1.00,N,MELROSE,"POLYGON ((-117.20929 33.233, -117.20929 33.233..."


## Electricity Cost

In [5]:
# California counties
us_counties = gpd.read_file("Data/Boundaries/cb_2018_us_county_500k/cb_2018_us_county_500k.shp")
ca_counties = us_counties[us_counties["STATEFP"] == "06"]

In [8]:
county_regex = re.compile(rf"The average residential electricity rate in Alameda County, CA is (\d+) ¢/kWh")

matching = county_regex.findall("The average residential electricity rate in Alameda County, CA is 30 ¢/kWh")

for matches in matching:
    print(matches)

30


In [None]:
# get cost for each California county

ca_counties["Electricity Price (cents/kWh)"] = np.nan # initialize the column.

# find EnergySage URL
energy_sage_ca = "https://www.energysage.com/local-data/electricity-cost/ca/"

for idx, county in ca_counties.iterrows():
    county_suffix = county["NAME"].replace(" ", "-").lower() + "-county" # convert county name to URL form

    county_energy_sage_url = energy_sage_ca + county_suffix

    county_energy_sage_html = requests.get(county_energy_sage_url) # retrieve website content

    # use regular expressions to extract electricity price
    county_regex = re.compile(rf"The average residential electricity rate in {county['NAME']} County, CA is (\d+) ¢/kWh")

    try:
        county_energy_sage_html = requests.get(county_energy_sage_url)
        matching = county_regex.findall(county_energy_sage_html.text)

        if len(matching) > 1:
            print(f"{county['NAME']} has more than one match.")
        elif len(matching) == 0:
            print(f"{county['NAME']} has no matches.")

        for matches in matching:
            ca_counties.loc[idx, 'Electricity Price (cents/kWh)'] = float(matches)

    except:
        print(county_suffix)

    time.sleep(1)


ca_counties["Electricity Price ($/MWh)"] = ca_counties["Electricity Price (cents/kWh)"]*10 # convert units (cents/kWh) to ($/MWh)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


STATEFP                                                                         06
COUNTYFP                                                                       001
COUNTYNS                                                                  01675839
AFFGEOID                                                            0500000US06001
GEOID                                                                        06001
NAME                                                                       Alameda
LSAD                                                                            06
ALAND                                                                   1909598013
AWATER                                                                   216923745
geometry                         POLYGON ((-122.342253 37.805558, -122.33411840...
Electricity Price (cents/kWh)                                                  NaN
Name: 37, dtype: object


  ca_counties.loc[idx, 'Electricity Price (cents/kWh)'] = matches


STATEFP                                                                         06
COUNTYFP                                                                       003
COUNTYNS                                                                  01675840
AFFGEOID                                                            0500000US06003
GEOID                                                                        06003
NAME                                                                        Alpine
LSAD                                                                            06
ALAND                                                                   1912292630
AWATER                                                                    12557304
geometry                         POLYGON ((-120.07239200000001 38.702767, -120....
Electricity Price (cents/kWh)                                                  NaN
Name: 38, dtype: object
STATEFP                                                        

In [18]:
ca_counties["Electricity Price ($/MWh)"] = ca_counties["Electricity Price (cents/kWh)"].astype(float)*10

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [20]:
ca_counties.to_file("Data/Boundaries/ca_counties.geojson", driver="GeoJSON")

### Solar + Wind Data (Emissions, Water Scarcity, Cost)