In [12]:
import pandas as pd
import datetime

Fact sheet: https://data.cnra.ca.gov/dataset/calgw-live/resource/9c58b42b-dc7d-438c-8225-3d5dbf9bc20d

The dashboard identifies the density of "susceptible” domestic wells per square mile based on recent groundwater level measurements and modeled future depth to water.

This dashboard and the underlying analysis provide a density map of domestic wells that are susceptible to going dry if recent groundwater trends continue. The map can be used to evaluate the relative density distribution within groundwater basins. However, the map should not be used to estimate the absolute number of domestic wells that are susceptible to going dry for any area or groundwater level scenario. 

Susceptibility is identified based on recent groundwater level measurements and potential **5-year water level decline**.

### Read and clean data

In [13]:
df = pd.read_csv("../data/raw/well-shortage-vulnerability.csv")

In [15]:
df.columns

Index(['OBJECTID', 'County_Name', 'MTRS', 'DomWellCount',
       'rRC2j_Percent_Farmed', 'rRC2g_Groundwater_Decline',
       'RC2f_Critically_Overdrafted', 'rRC2i_SWRCB_Water_Quality_Ris',
       'rRC3a_Well_Susceptibility', 'RC3a_Well_Susceptibility',
       'RC2g_Min_WSE_Change', 'RC5a_Household_Water_Outage', 'Shape__Area',
       'Date_Last_Modified'],
      dtype='object')

In [17]:
df.rename(columns={
    'OBJECTID': "object_id", 
    'County_Name': "county", 
    'MTRS': "mtrs", 
    'DomWellCount': "domestic_well_count",
    'rRC2j_Percent_Farmed': "percent_farmed", 
    'rRC2g_Groundwater_Decline' :"groundwater_decline",
    'RC2f_Critically_Overdrafted': "critically_overdrafted", 
    'rRC2i_SWRCB_Water_Quality_Ris': "water_quality",
    'rRC3a_Well_Susceptibility': "sus_well_score", 
    'RC3a_Well_Susceptibility': "sus_well_adj_count",
    'RC2g_Min_WSE_Change': "min_wse_change", 
    'RC5a_Household_Water_Outage': "household_water_outage", 
    'Shape__Area': "area",
    'Date_Last_Modified': "date"
}, inplace=True)

In [20]:
df['date'] = pd.to_datetime(df['date'], unit='ms')

In [21]:
df.head()

Unnamed: 0,object_id,county,mtrs,domestic_well_count,percent_farmed,groundwater_decline,critically_overdrafted,water_quality,sus_well_score,sus_well_adj_count,min_wse_change,household_water_outage,area,date
0,1,Napa,BAY/DELTA,,0.0,0.0,No,,0.0,,,No,7859067.0,2022-09-29
1,2,Sonoma,BAY/DELTA,,0.0,0.0,No,,0.0,,,No,61150840.0,2022-09-29
2,3,Marin,BAY/DELTA,,0.0,0.0,No,,0.0,,,No,78267.93,2022-09-29
3,4,Santa Clara,BAY/DELTA,,0.0,0.0,No,,0.0,,,No,19829550.0,2022-09-29
4,5,Sacramento,BAY/DELTA,,0.0,0.0,No,,0.0,,,No,52116230.0,2022-09-29


In [26]:
df.to_csv("../data/processed/wells/well-shortage-vulnerability-clean.csv", index=False)

### Analysis

In [29]:
# Total number of domestic wells
df.domestic_well_count.sum()

369767.0

#### Count of wells susceptible to going dry in the next 5df.sus_well_adj_count.sum() years if conditions persist

In [19]:
# Total number of susceptible wells
df.sus_well_adj_count.sum()

15064.0

In [30]:
# Percent of susceptible wells
df.sus_well_adj_count.sum() / df.domestic_well_count.sum()

0.040739168178880215

In [24]:
county_group = df.groupby("county").sus_well_adj_count.sum().reset_index(name="sus_wells").sort_values("sus_wells", ascending=False)

In [25]:
county_group.head()

Unnamed: 0,county,sus_wells
9,Fresno,2501.0
53,Tulare,1576.0
19,Madera,1374.0
51,Tehama,1098.0
48,Sonoma,1078.0


In [27]:
county_group.to_csv("../data/processed/wells/well-shortage-vulnerability-county.csv", index=False)

#### Water quality

In [28]:
df.water_quality.unique()

array([nan, 0. , 1. , 0.5])