In [2]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [3]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Import

In [15]:
df = pd.read_csv("../data/raw/uw-usage.csv", parse_dates=["Reporting Month"])

### Clean

In [20]:
df.columns = df.columns.str.replace(" ","_").str.replace("-","_").str.replace(r'[#,@,&,(,)]', '', regex=True).str.lower()

Trim this downn to just the columns we need

In [21]:
df.columns

Index(['supplier_name', 'public_water_system_id', 'reporting_month', 'county',
       'hydrologic_region', 'climate_zone', 'total_population_served',
       'reference_2014_population', 'county_under_drought_declaration',
       'water_shortage_contingency_stage_invoked',
       'water_shortage_level_indicator',
       'dwr_state_standard_level_corresponding_to_stage',
       'water_production_units',
       'reported_preliminary_total_potable_water_production',
       'reported_final_total_potable_water_production',
       'preliminary_percent_residential_use', 'final_percent_residential_use',
       'reported_preliminary_commercial_agricultural_water',
       'reported_final_commercial_agricultural_water',
       'reported_preliminary_commercial_industrial_and_institutional_water',
       'reported_final_commercial_industrial_and_institutional_water',
       'reported_recycled_water', 'reported_non_revenue_water',
       'calculated_total_potable_water_production_gallons_ag_excluded'

In [22]:
keeps = [
    'supplier_name', 
    'public_water_system_id', 
    'reporting_month', 
    'county',
    'hydrologic_region', 
    'climate_zone', 
    'total_population_served',
    # 'county_under_drought_declaration',
    # 'water_shortage_contingency_stage_invoked',
    # 'water_shortage_level_indicator',
    # 'dwr_state_standard_level_corresponding_to_stage',
    'water_production_units',
    'reported_preliminary_total_potable_water_production',
    'reported_final_total_potable_water_production',
    'preliminary_percent_residential_use', 
    'final_percent_residential_use',
    # 'reported_preliminary_commercial_agricultural_water',
    # 'reported_final_commercial_agricultural_water',
    # 'reported_preliminary_commercial,_industrial_and_institutional_water',
    # 'reported_final_commercial_industrial_and_institutional_water',
    # 'reported_recycled_water', 
    # 'reported_non_revenue_water',
    'calculated_total_potable_water_production_gallons_ag_excluded',
    # 'calculated_total_potable_water_production_gallons_2013_ag_excluded',
    # 'calculated_commercial_agricultural_water_gallons',
    # 'calculated_commercial_agricultural_water_gallons_2013',
    'calculated_r_gpcd', 
    # 'qualification'
]

In [24]:
trim_df = df[keeps]

In [27]:
trim_df.head()

Unnamed: 0,supplier_name,public_water_system_id,reporting_month,county,hydrologic_region,climate_zone,total_population_served,water_production_units,reported_preliminary_total_potable_water_production,reported_final_total_potable_water_production,preliminary_percent_residential_use,final_percent_residential_use,calculated_total_potable_water_production_gallons_ag_excluded,calculated_r_gpcd
0,East Bay Municipal Utilities District,CA0110005,2022-03-15,"Alameda,Contra Costa",San Francisco Bay,3,1430000,MG,4481.0,,58.0,,4481000000.0,58.628
1,East Bay Municipal Utilities District,CA0110005,2022-02-15,"Alameda,Contra Costa",San Francisco Bay,3,1430000,MG,3898.0,,54.0,,3898000000.0,52.5704
2,East Bay Municipal Utilities District,CA0110005,2022-01-15,"Alameda,Contra Costa",San Francisco Bay,3,1420000,MG,3807.0,,58.0,,3807000000.0,50.1604
3,East Bay Municipal Utilities District,CA0110005,2021-12-15,"Alameda,Contra Costa",San Francisco Bay,3,1420000,MG,3754.0,,60.0,,3754000000.0,51.1677
4,East Bay Municipal Utilities District,CA0110005,2021-11-15,"Alameda,Contra Costa",San Francisco Bay,3,1420000,MG,3773.0,,61.0,,3773000000.0,54.0265


Calculate population-weighted r-gpcd for hydrologic regions

In [35]:
def regional_calcs(df, gals, rgpcd, pop):
    val = df[rgpcd]
    wt = df[pop]
    wt_avg = (val * wt).sum() / wt.sum()
    #return (val * wt).sum() / wt.sum()
    total_gals = df[gals].sum()
    total_pop = df[pop].sum()
    return pd.Series([total_pop, total_gals, wt_avg], index=['total_pop', 'total_gallons', 'pop_weighted_average'])

In [37]:
region_df = trim_df.groupby(
    ['reporting_month','hydrologic_region']
).apply(
    regional_calcs,
    "calculated_total_potable_water_production_gallons_ag_excluded",
    'calculated_r_gpcd', 
    'total_population_served'
).reset_index()

In [40]:
region_df

Unnamed: 0,reporting_month,hydrologic_region,total_pop,total_gallons,pop_weighted_average
0,2014-06-15,Central Coast,1222138.0,5.604272e+09,99.958677
1,2014-06-15,Colorado River,753033.0,7.206261e+09,221.820762
2,2014-06-15,North Coast,372282.0,1.545991e+09,88.473368
3,2014-06-15,North Lahontan,92882.0,6.701514e+08,161.949192
4,2014-06-15,Sacramento River,2611811.0,2.190972e+10,186.962350
...,...,...,...,...,...
935,2022-03-15,San Francisco Bay,6569288.0,1.858536e+10,58.590408
936,2022-03-15,San Joaquin River,1536399.0,6.692555e+09,92.508034
937,2022-03-15,South Coast,19795946.0,7.224709e+10,76.396130
938,2022-03-15,South Lahontan,837117.0,3.205327e+09,90.109902


In [42]:
statewide_df = trim_df.groupby(
    ['reporting_month']
).apply(
    regional_calcs,
    "calculated_total_potable_water_production_gallons_ag_excluded",
    'calculated_r_gpcd', 
    'total_population_served'
).reset_index()

In [288]:
# days_per_month = {
#     "1": 31,
#     "2": 28,
#     "3": 31,
#     "4": 30,
#     "5": 31,
#     "6": 30,
#     "7": 31,
#     "8": 31,
#     "9": 30,
#     "10": 31,
#     "11": 30,
#     "12": 31
# }

In [195]:
keeps = ['supplier_name', 'reporting_month', 'county',
       'hydrologic_region', 'total_population_served',
       #'county_under_drought_declaration',
       # 'water_shortage_contingency_stage_invoked',
       #'water_shortage_level_indicator',
         'dwr_state_standard_level_corresponding_to_stage',
        'calculated_r_gpcd']

In [196]:
trim_df = df[keeps]

In [197]:
merge_df = pd.merge(
    trim_df, 
    regional_usage[["hydrologic_region","reporting_month","pop_weighted_rgpcd"]], 
    how="left", 
    on=["hydrologic_region","reporting_month"]
)

In [198]:
melt = pd.melt(
    merge_df, 
    id_vars=["supplier_name","hydrologic_region","reporting_month"], 
    value_vars=["calculated_r_gpcd","pop_weighted_rgpcd"]
)

In [199]:
base = alt.Chart(
    merge_df[(merge_df.supplier_name == "Los Angeles Department of Water and Power") & (merge_df.reporting_month > "2021-04-01")]
).encode(
    x=alt.X("yearmonth(reporting_month):O"),
)

bar = base.mark_bar(color="#83c6e0").encode(
    y=alt.Y("calculated_r_gpcd", stack=None)
)

avg_line = base.mark_line(interpolate='step', color='#1281aa').encode(
    y=alt.Y("pop_weighted_rgpcd")
)

goal_line = alt.Chart(pd.DataFrame({'y': [80]})).mark_rule(color="#b75a36",strokeDash=[10,11]).encode(y='y')

(bar + avg_line + goal_line).properties(title="LADWP residential water usage compared to regional average", width=600)

In [200]:
merge_df#[(merge_df.supplier_name == "Los Angeles Department of Water and Power") & (merge_df.reporting_month > "2021-04-01")]

Unnamed: 0,supplier_name,reporting_month,county,hydrologic_region,total_population_served,dwr_state_standard_level_corresponding_to_stage,calculated_r_gpcd,pop_weighted_rgpcd
0,East Bay Municipal Utilities District,2022-03-15,"Alameda,Contra Costa",San Francisco Bay,1430000.0,5.0,58.6280,59
1,East Bay Municipal Utilities District,2022-02-15,"Alameda,Contra Costa",San Francisco Bay,1430000.0,,52.5704,55
2,East Bay Municipal Utilities District,2022-01-15,"Alameda,Contra Costa",San Francisco Bay,1420000.0,,50.1604,48
3,East Bay Municipal Utilities District,2021-12-15,"Alameda,Contra Costa",San Francisco Bay,1420000.0,,51.1677,48
4,East Bay Municipal Utilities District,2021-11-15,"Alameda,Contra Costa",San Francisco Bay,1420000.0,,54.0265,50
...,...,...,...,...,...,...,...,...
37103,Thermalito Water and Sewer District,2021-05-15,Butte,Sacramento River,10312.0,,228.7530,159
37104,Thermalito Water and Sewer District,2021-04-15,Butte,Sacramento River,10312.0,,148.1860,123
37105,Thermalito Water and Sewer District,2021-03-15,Butte,Sacramento River,10312.0,,89.5637,63
37106,Thermalito Water and Sewer District,2021-02-15,Butte,Sacramento River,10312.0,,86.9818,72


In [201]:
sort_df = merge_df.sort_values(["reporting_month","supplier_name"])

In [202]:
sort_df.to_csv("../data/processed/district-level-residential-use.csv", index=False)