In [52]:
# geospatial analysis
import geopandas as gpd
import shapely

# data analysis
import pandas as pd

# web scraping
import requests
from bs4 import BeautifulSoup
import time
import re

In [38]:
# essential variables

# let's try to convert everything to EPSG:5070. For more accurate area measurements

# boundary for state of California
us_states = gpd.read_file("Data/Boundaries/cb_2018_us_state_500k/cb_2018_us_state_500k.shp")
us_states.to_crs("EPSG:5070", inplace=True)
ca_state = us_states[us_states["STUSPS"] == "CA"]

# California counties
us_counties = gpd.read_file("Data/Boundaries/cb_2018_us_county_500k/cb_2018_us_county_500k.shp")
ca_counties = us_counties[us_counties["STATEFP"] == "06"]

# HUC8 subbasins
huc8 = gpd.read_file("Data/Boundaries/HUC8_CONUS/HUC8_US.shp")
huc8.to_crs("EPSG:5070", inplace=True)
huc8['CA'] = huc8["STATES"].map(lambda x: "CA" in x)
huc8 = huc8[huc8["CA"]]

# intersect with California
huc8_ca = gpd.clip(huc8, ca_state)

### Land Restrictions

In summary, we can basically ignore land property restrictions.

We haven't investigated hosting capacity yet.

In [None]:
# let's not worry about utilities for now, just for time. and I don't think it's the most important restriction
# # utilities map in California
# ca_utilities = gpd.read_file("Data/Boundaries/ElectricLoadServingEntities_IOU_POU_-6992589901327565970.geojson")

In [None]:
# federal, state, local lands
# not too many tribal lands in California so we don't worry about that

if False:
    # overall takes 30 seconds
    us_lands = gpd.read_file("Data/Boundaries/USA_Federal_Lands_1879006603790792540.geojson")

    # filter invalid geometries
    us_lands = us_lands[~us_lands['geometry'].isna()]
    us_lands = us_lands[us_lands['geometry'].is_valid]

    us_lands.to_crs("EPSG:5070", inplace=True)

    # first filter US lands to California
    ca_geometry = ca_state.iloc[0].geometry
    us_lands_ca = us_lands[us_lands.intersects(ca_geometry)]

In [None]:
# cut out US lands from California subbasins

# overall, turns out that no HUC8 (besides some island) is fully contained in federal land.

if False:
    # takes around 30 seconds
    all_us_land = us_lands_ca.union_all()

    subbasin_us_land = []

    for idx, row in huc8_ca.iterrows():
        if shapely.contains(all_us_land, row['geometry']):
            print(idx)
            subbasin_us_land.append(True)
        else:
            subbasin_us_land.append(False)

    huc8_ca["Government_Land"] = subbasin_us_land
    huc8_ca = huc8_ca[~huc8_ca["Government_Land"]]

355


### Grid Data (Emissions, Water Scarcity, Cost)

Emissions and Water Scarcity

In [39]:
# let's go with Siddik et al. data (2021) for now.
water_carbon = pd.read_excel("Data/Footprint/Siddik/SI_XLS/Results.xlsx", sheet_name="Table 3", skiprows=1)

# filter to only HUC rows
water_carbon = water_carbon[~water_carbon['WSF_1MW_DC'].isna()]

# turn HUC8 into string format
water_carbon['HUC8_str'] = water_carbon['HUC8'].map(lambda x: ''.join(['0']*(8-len(str(x)))) + str(x))

# merge with California data
huc8_ca = huc8_ca.merge(water_carbon, how="inner", left_on="HUC8", right_on="HUC8_str")

Electricity Cost

In [54]:
county_regex = re.compile(rf"The average residential electricity rate in Alameda County, CA is (\d+) ¢/kWh")

county_regex.findall("The average residential electricity rate in Alameda County, CA is 30 ¢/kWh")

['30']

In [None]:
# get cost for each California county

ca_counties["Electricity Price (cents/kWh)"] = -1 # place holder for now.

# find EnergySage URL
energy_sage_ca = "https://www.energysage.com/local-data/electricity-cost/ca/"

for _, county in ca_counties.iterrows():
    county_suffix = county["NAME"].replace(" ", "-").lower() + "-county"

    county_energy_sage_url = energy_sage_ca + county_suffix

    # sounds like all of the links work fine
    county_energy_sage_html = requests.get(county_energy_sage_url)

    county_regex = re.compile(rf"The average residential electricity rate in {county["NAME"]} County, CA is (\d+) ¢/kWh")

    # TODO I left off here - Richard

    try:
        county_energy_sage_html = requests.get(county_energy_sage_url)
        matching = county_regex.findall(county_energy_sage_html)

        for matches in matching:
             = county_regex

    except:
        print(county_suffix)




# convert units (cents/kWh) to ($/MW)

In [None]:
# I can only think of using EnergySage for most accurate prices.
# https://www.energysage.com/local-data/electricity-cost/ca/ TODO webscrape
# https://www.gridstatus.io/products/api. Or, we can use this Grid Status website.


energy_sage_ca = "https://www.energysage.com/local-data/electricity-cost/ca/"
energy_sage_html = requests.get(energy_sage_ca)
energy_sage_soup = BeautifulSoup(energy_sage_html.text, 'html.parser')

In [30]:
for link in energy_sage_soup.find_all("a"):
    if 'county' in link.get("href"):
        print(link.get("href"))

/local-data/electricity-cost/ca/alameda-county/
/local-data/electricity-cost/ca/amador-county/
/local-data/electricity-cost/ca/butte-county/
/local-data/electricity-cost/ca/calaveras-county/
/local-data/electricity-cost/ca/colusa-county/
/local-data/electricity-cost/ca/contra-costa-county/
/local-data/electricity-cost/ca/el-dorado-county/
/local-data/electricity-cost/ca/fresno-county/
/local-data/electricity-cost/ca/humboldt-county/
/local-data/electricity-cost/ca/imperial-county/
/local-data/electricity-cost/ca/inyo-county/
/local-data/electricity-cost/ca/kern-county/
/local-data/electricity-cost/ca/kings-county/
/local-data/electricity-cost/ca/lake-county/
/local-data/electricity-cost/ca/los-angeles-county/
/local-data/electricity-cost/ca/madera-county/
/local-data/electricity-cost/ca/marin-county/
/local-data/electricity-cost/ca/mariposa-county/
/local-data/electricity-cost/ca/mendocino-county/
/local-data/electricity-cost/ca/merced-county/
/local-data/electricity-cost/ca/monterey-c

### Solar + Wind Data (Emissions, Water Scarcity, Cost)