In [189]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [190]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Import

District data downloaded from portal

In [191]:
df = pd.read_csv("../data/raw/water-use-by-district-timeseries.csv", parse_dates=["reporting_month"])

Regional data scraped from dashboard 

In [192]:
regional_usage = pd.read_csv("../data/processed/regional-residential-usage.csv", parse_dates=["reporting_month"])

In [193]:
regional_usage

Unnamed: 0,hydrologic_region,reporting_month,agencies_reporting,pop_weighted_rgpcd
0,Central Coast,2014-06-15,30,100
1,Colorado River,2014-06-15,13,222
2,North Coast,2014-06-15,13,88
3,North Lahontan,2014-06-15,5,162
4,Sacramento River,2014-06-15,40,187
...,...,...,...,...
935,San Francisco Bay,2022-03-15,43,59
936,San Joaquin River,2022-03-15,22,93
937,South Coast,2022-03-15,164,76
938,South Lahontan,2022-03-15,16,90


---
### Clean and analyze

In [194]:
df.columns

Index(['supplier_name', 'public_water_system_id', 'reporting_month', 'county',
       'hydrologic_region', 'climate_zone', 'total_population_served',
       'reference_2014_population', 'county_under_drought_declaration',
       'water_shortage_contingency_stage_invoked',
       'water_shortage_level_indicator',
       'dwr_state_standard_level_corresponding_to_stage',
       'water_production_units',
       'reported_preliminary_total_potable_water_production',
       'reported_final_total_potable_water_production',
       'preliminary_percent_residential_use', 'final_percent_residential_use',
       'reported_preliminary_commercial_agricultural_water',
       'reported_final_commercial_agricultural_water',
       'reported_preliminary_commercial_industrial_and_institutional_water',
       'reported_final_commercial_industrial_and_institutional_water',
       'reported_recycled_water', 'reported_non_revenue_water',
       'calculated_total_potable_water_production_gallons_ag_excluded'

In [195]:
keeps = ['supplier_name', 'reporting_month', 'county',
       'hydrologic_region', 'total_population_served',
       #'county_under_drought_declaration',
       # 'water_shortage_contingency_stage_invoked',
       #'water_shortage_level_indicator',
         'dwr_state_standard_level_corresponding_to_stage',
        'calculated_r_gpcd']

In [196]:
trim_df = df[keeps]

In [197]:
merge_df = pd.merge(
    trim_df, 
    regional_usage[["hydrologic_region","reporting_month","pop_weighted_rgpcd"]], 
    how="left", 
    on=["hydrologic_region","reporting_month"]
)

In [198]:
melt = pd.melt(
    merge_df, 
    id_vars=["supplier_name","hydrologic_region","reporting_month"], 
    value_vars=["calculated_r_gpcd","pop_weighted_rgpcd"]
)

In [199]:
base = alt.Chart(
    merge_df[(merge_df.supplier_name == "Los Angeles Department of Water and Power") & (merge_df.reporting_month > "2021-04-01")]
).encode(
    x=alt.X("yearmonth(reporting_month):O"),
)

bar = base.mark_bar(color="#83c6e0").encode(
    y=alt.Y("calculated_r_gpcd", stack=None)
)

avg_line = base.mark_line(interpolate='step', color='#1281aa').encode(
    y=alt.Y("pop_weighted_rgpcd")
)

goal_line = alt.Chart(pd.DataFrame({'y': [80]})).mark_rule(color="#b75a36",strokeDash=[10,11]).encode(y='y')

(bar + avg_line + goal_line).properties(title="LADWP residential water usage compared to regional average", width=600)

In [200]:
merge_df#[(merge_df.supplier_name == "Los Angeles Department of Water and Power") & (merge_df.reporting_month > "2021-04-01")]

Unnamed: 0,supplier_name,reporting_month,county,hydrologic_region,total_population_served,dwr_state_standard_level_corresponding_to_stage,calculated_r_gpcd,pop_weighted_rgpcd
0,East Bay Municipal Utilities District,2022-03-15,"Alameda,Contra Costa",San Francisco Bay,1430000.0,5.0,58.6280,59
1,East Bay Municipal Utilities District,2022-02-15,"Alameda,Contra Costa",San Francisco Bay,1430000.0,,52.5704,55
2,East Bay Municipal Utilities District,2022-01-15,"Alameda,Contra Costa",San Francisco Bay,1420000.0,,50.1604,48
3,East Bay Municipal Utilities District,2021-12-15,"Alameda,Contra Costa",San Francisco Bay,1420000.0,,51.1677,48
4,East Bay Municipal Utilities District,2021-11-15,"Alameda,Contra Costa",San Francisco Bay,1420000.0,,54.0265,50
...,...,...,...,...,...,...,...,...
37103,Thermalito Water and Sewer District,2021-05-15,Butte,Sacramento River,10312.0,,228.7530,159
37104,Thermalito Water and Sewer District,2021-04-15,Butte,Sacramento River,10312.0,,148.1860,123
37105,Thermalito Water and Sewer District,2021-03-15,Butte,Sacramento River,10312.0,,89.5637,63
37106,Thermalito Water and Sewer District,2021-02-15,Butte,Sacramento River,10312.0,,86.9818,72


In [201]:
sort_df = merge_df.sort_values(["reporting_month","supplier_name"])

In [202]:
sort_df.to_csv("../data/processed/district-level-residential-use.csv", index=False)