## Setup

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat

In [3]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [4]:
wells = pd.read_csv('./data/well_completion.csv') 
shortage = pd.read_csv('./data/shortages.csv') 
levels = pd.read_csv('./data/spring_water_levels.csv') 

In [5]:
main_counties = ["Fresno", "Kern", "Kings", "Madera", "Merced", "San Joaquin", "Stanislaus", "Tulare"]

## Wells drilled in 2021

In [6]:
grouped_date_use = wells.groupby(['year_work_ended','use']).agg({
    'wcr_no': ['count'],
}).reset_index()
grouped_date_use.columns = ["date","use","count"]

In [7]:
grouped_date_use_pivot = grouped_date_use.pivot(index='date', columns='use', values='count').reset_index()

In [8]:
grouped_date_use_pivot.tail(1)

use,date,agriculture,domestic,public
97,2021.0,619.0,671.0,12.0


## Total agriculture wells

In [9]:
grouped_date_use_pivot['agriculture'].sum()

34362.0

## Total domestic wells

In [10]:
grouped_date_use_pivot['domestic'].sum()

59513.0

## Well construction over time

In [11]:
drought_years = [
    {"start": 1976, "end": 1977},
    {"start": 1987, "end": 1992},
    {"start": 2001, "end": 2002},
    {"start": 2007, "end": 2009},
    {"start": 2012, "end": 2016},
]

drought_years = alt.pd.DataFrame(drought_years)

sgma_year = [
    {"year": 2014, "title": 'SGMA passed'}
]
sgma_year = alt.pd.DataFrame(sgma_year)

highlight = alt.selection(type='single', 
                          on='mouseover',
                          fields=['use'], 
                          nearest=True,
                          init={'use':'agriculture'})

base = alt.Chart(grouped_date_use[grouped_date_use['date'] >= 1960]).encode(
    x=alt.X('date:T', axis=alt.Axis(values=[.995,.020], format='%Y', title='Year')),
    y='count:Q',
    color='use:N',
    tooltip=['date', 'use', 'count']
)

points = base.mark_circle().encode(
    opacity=alt.value(0)
).add_selection(
    highlight
).properties(
    width=600,
    height=300
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
)

rect = alt.Chart(drought_years).mark_rect(color="#EEE").encode(
    x='start:T',
    x2='end:T',
)

rules = alt.Chart(sgma_year).mark_rule(color="#000").encode(
    x='date:T',
)

rect + points + lines + rules

## Agriculture well construction by county and year

In [12]:
grouped_year_county = wells[wells['use'] == 'agriculture'].groupby(by=['year_work_ended', 'county']).size().reset_index(name="count")
grouped_year_county.rename(columns={'year_work_ended':'year'}, inplace=True)
grouped_year_county['year'] = grouped_year_county['year'].astype('int')

In [13]:
alt.Chart(grouped_year_county[(grouped_year_county['year'] >= 1980) & 
                              (grouped_year_county['county'].isin(main_counties))]).mark_line().encode(
    x='year:O',
    y='count:Q',
).properties(
    width=160,
    height=160
).facet(
    facet='county:N',
    columns=4
)

## 2021 agriculture well drilling by county

In [14]:
grouped_year_county[(grouped_year_county['county'].isin(main_counties)) & (grouped_year_county['year'] == 2021)]

Unnamed: 0,year,county,count
772,2021,Fresno,179
773,2021,Kern,60
774,2021,Kings,39
775,2021,Madera,35
776,2021,Merced,32
778,2021,San Joaquin,9
779,2021,Stanislaus,17
780,2021,Tulare,243


## Percent of 2021 agriculture drilling in Tulare County

In [15]:
drilling_2021 = grouped_year_county[grouped_year_county['year'] == 2021]['count'].sum()
drilling_2021_tulare = grouped_year_county[(grouped_year_county['year'] == 2021) & (grouped_year_county['county'] == 'Tulare')]['count'].sum()
drilling_2021_tulare / drilling_2021

0.3925686591276252

## Wells completed since 2014

In [16]:
grouped_use_2014 = wells[wells['year_work_ended'] >= 2014].groupby(['use']).agg({
    'wcr_no': ['count'],
}).reset_index()
grouped_use_2014.columns = ["use","count"]
grouped_use_2014

Unnamed: 0,use,count
0,agriculture,7064
1,domestic,6679
2,public,281


## Wells completed since 2020

In [17]:
grouped_use_2020 = wells[wells['year_work_ended'] >= 2020].groupby(['use']).agg({
    'wcr_no': ['count'],
}).reset_index()
grouped_use_2020.columns = ["use","count"]
grouped_use_2020

Unnamed: 0,use,count
0,agriculture,1135
1,domestic,1514
2,public,41


## Overall water table changes from given start year

In [21]:
levels.sample()

Unnamed: 0,site,MTRS,TownshipRange,county,year,gse_gwe
5837,351255N1193249W001,MDM-T32S-R25E-19,T32S R25E,Kern,2007,89.0


In [22]:
# Group wells that had multiple measurements each spring and get the average
levels_group = levels.groupby(['site','county','year']).agg({
    'gse_gwe': ['mean'],
}).reset_index()
levels_group.columns = ['site','county','year','gse_gwe']

In [23]:
def get_level_change_overall(start_year):
    # Filter for the years that we are interested in
    levels_group_trim = levels_group[levels_group['year'] >= start_year]
    avg_change_year = levels_group_trim.groupby(['year']).agg({
        'gse_gwe':['mean', 'count']
    }).reset_index()
    avg_change_year.columns = ['year','level','count']
    start = avg_change_year[avg_change_year['year'] == start_year]['level'].iloc[0]
    avg_change_year['start_level'] = start
    avg_change_year['diff_start'] = avg_change_year['start_level'] - avg_change_year['level']
    return avg_change_year

In [24]:
overall_level_change_1950 = get_level_change_overall(1950)
overall_level_change_1950.tail(1)

Unnamed: 0,year,level,count,start_level,diff_start
71,2021,172.283911,1398,67.813887,-104.470024


In [25]:
overall_level_change_2011 = get_level_change_overall(2011)
overall_level_change_2011.tail(1)

Unnamed: 0,year,level,count,start_level,diff_start
10,2021,172.283911,1398,128.768708,-43.515203


In [26]:
alt.Chart(overall_level_change_1950).mark_line().encode(
    x='year:O',
    y=alt.Y('diff_start:Q'),
    tooltip=['year', 'diff_start']
)

## Dry wells from 2020-2021 water level changes

In [27]:
# group spring measurements by range and year
range_levels = levels.groupby(['TownshipRange','year']).agg({
    'gse_gwe': ['mean'],
}).reset_index()
range_levels.columns = ['TownshipRange','year','gse_gwe']

In [28]:
# create datasets for each year
range_levels_2020 = range_levels[range_levels['year'] == 2020]
range_levels_2021 = range_levels[range_levels['year'] == 2021]
range_levels_2021.sample()

Unnamed: 0,TownshipRange,year,gse_gwe
19805,T24S R18E,2021,162.909091


In [29]:
# merge the wells data with the 2020 measurements data by township/range
wells_depth = wells.loc[wells['use'] == 'domestic',['wcr_no','TownshipRange','completed_depth_updated','year_work_ended','county']]
wells_depth = wells_depth.merge(range_levels_2020, on=["TownshipRange"], how="left")
wells_depth = wells_depth.drop_duplicates(subset="wcr_no")
wells_depth.sample()

Unnamed: 0,wcr_no,TownshipRange,completed_depth_updated,year_work_ended,county,year,gse_gwe
52550,WCR0230720,T03S R08E,265.0,1988.0,Stanislaus,2020.0,23.68


In [30]:
# categorize dry and not dry wells based on depth (allowing for 20 feet of pump placement)
wells_depth['condition'] = 'not_dry'
wells_depth.loc[pd.isna(wells_depth['completed_depth_updated']) | pd.isna(wells_depth['gse_gwe']), 'condition'] = 'unknown'
wells_depth.loc[wells_depth['completed_depth_updated'] - 20 <= wells_depth['gse_gwe'], 'condition'] = 'dry'

In [31]:
# get all of the 'not dry' 2020 wells
not_dry_2020 = wells_depth.loc[wells_depth['condition'] == 'not_dry']
not_dry_2020.sample()

Unnamed: 0,wcr_no,TownshipRange,completed_depth_updated,year_work_ended,county,year,gse_gwe,condition
36990,WCR1990-010905,T05N R06E,225.0,1990.0,Sacramento,2020.0,79.525,not_dry


In [32]:
# how many not dry wells are in each county?
not_dry_2020[not_dry_2020['county'].isin(main_counties)].groupby('county').size().reset_index(name='count')

Unnamed: 0,county,count
0,Fresno,12358
1,Kern,1445
2,Kings,1938
3,Madera,2346
4,Merced,5897
5,San Joaquin,8122
6,Stanislaus,7546
7,Tulare,5365


In [33]:
# merge the not dry 2020 with the range 2021
dry_wells_2021 = not_dry_2020.merge(range_levels_2021, on=["TownshipRange"], how="left")
dry_wells_2021 = dry_wells_2021.drop_duplicates(subset="wcr_no")
dry_wells_2021.rename(columns={"gse_gwe_x":"level_2020", "gse_gwe_y":"level_2021", "condition":"condition_2020"}, inplace=True)
dry_wells_2021.drop(columns=['year_x', 'year_y'], inplace=True)
dry_wells_2021.sample()

Unnamed: 0,wcr_no,TownshipRange,completed_depth_updated,year_work_ended,county,level_2020,condition_2020,level_2021
39167,WCR0025056,T05S R10E,265.0,,Stanislaus,37.06,not_dry,34.35


In [34]:
# categorize dry and not dry wells based on depth (allowing for 20 feet of pump placement)
dry_wells_2021['condition_2021'] = 'not_dry'
dry_wells_2021.loc[pd.isna(dry_wells_2021['completed_depth_updated']) | pd.isna(dry_wells_2021['level_2021']), 'condition_2021'] = 'unknown'
dry_wells_2021.loc[dry_wells_2021['completed_depth_updated'] - 20 <= dry_wells_2021['level_2021'], 'condition_2021'] = 'dry'
dry_wells_2021.sample()

Unnamed: 0,wcr_no,TownshipRange,completed_depth_updated,year_work_ended,county,level_2020,condition_2020,level_2021,condition_2021
36582,WCR0257563,T03S R09E,120.0,2005.0,Stanislaus,59.05,not_dry,57.05,not_dry


In [35]:
dry_wells_2021_range = dry_wells_2021.groupby(['TownshipRange','condition_2021']).size().reset_index(name='count')
dry_wells_2021_county = dry_wells_2021.groupby(['county','condition_2021']).size().reset_index(name='count')

In [36]:
dry_wells_2021_range = dry_wells_2021_range.pivot(index=['TownshipRange'], columns='condition_2021', values='count').reset_index()
dry_wells_2021_range = dry_wells_2021_range.fillna(0)
dry_wells_2021_range.head()

condition_2021,TownshipRange,dry,not_dry,unknown
0,T01N R02E,5.0,204.0,0.0
1,T01N R03E,2.0,559.0,0.0
2,T01N R06E,6.0,140.0,0.0
3,T01N R07E,0.0,308.0,0.0
4,T01N R08E,0.0,205.0,0.0


In [37]:
dry_wells_2021_range['dry'].sum()

1010.0

## Household water shortages by year

In [38]:
shortage_by_year = shortage.groupby(by=['CreateDateYear']).size().reset_index(name='Count')
shortage_by_year.rename(columns={'CreateDateYear':'Year'}, inplace=True)
shortage_by_year['Year'] = shortage_by_year['Year'].astype('int')

In [39]:
shortage_by_year

Unnamed: 0,Year,Count
0,2012,1
1,2014,166
2,2015,297
3,2016,1334
4,2017,168
5,2018,30
6,2019,29
7,2020,29
8,2021,465


In [40]:
# percent change frpm 2020 to 2021
(shortage_by_year['Count'].iloc[-1] -  shortage_by_year['Count'].iloc[-2]) / shortage_by_year['Count'].iloc[-2]

15.03448275862069

## Shortages by year and county

In [41]:
shortage_by_year_county = shortage[shortage['County'].isin(main_counties)].groupby(by=['CreateDateYear', 'County']).size().reset_index(name='Count')
shortage_by_year_county.rename(columns={'CreateDateYear':'Year'}, inplace=True)
shortage_by_year_county['Year'] = shortage_by_year_county['Year'].astype('int')

In [42]:
shortage_by_year_county[shortage_by_year_county['Year']==2021]

Unnamed: 0,Year,County,Count
42,2021,Fresno,113
43,2021,Kern,3
44,2021,Kings,15
45,2021,Madera,127
46,2021,Merced,44
47,2021,San Joaquin,19
48,2021,Stanislaus,9
49,2021,Tulare,133


In [43]:
# Percent of shortages that are from Tulare County
shortages_2021 = shortage_by_year[shortage_by_year['Year']==2021]['Count'].iloc[0]
shortages_2021_Tulare = shortage_by_year_county[(shortage_by_year_county['Year']==2021) & (shortage_by_year_county['County']=='Tulare')]['Count'].iloc[0]
shortages_2021_Tulare / shortages_2021

0.2860215053763441