In [428]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [429]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [430]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [431]:
df = pd.read_csv("../data/processed/district-level-residential-use.csv", parse_dates=["reporting_month"])

In [432]:
baselines_df = pd.read_csv("../data/raw/uw-2020-baseline-values.csv")

### Clean

Clean up baselines column names as we did in the previous notebook

In [433]:
baselines_df.columns = baselines_df.columns.str.replace(" ","_").str.replace("-","_").str.replace(r'[#,@,&,(,)]', '', regex=True).str.lower()

In [434]:
baselines_df.supplier_name = baselines_df.supplier_name.str.replace("  ", " ")

In [435]:
baselines_trimmed_df = baselines_df[[
    'supplier_name', 
    'public_water_system_id', 
    'month', 
    #'original_units',
    # 'total_potable_production_original_units',
    # 'potable_commercial_agriculture_original_units',
    'total_potable_production_minus_ag_gallons', 
    # 'staff_notes'
]].copy()

Get rid of some unnecessary columns

In [436]:
trim_df = df[[
    'supplier_name', 
    'public_water_system_id', 
    'reporting_month', 
    'county',
    'hydrologic_region', 
    # 'climate_zone', 
    'total_population_served',
    'calculated_total_potable_water_production_gallons_ag_excluded',
    # 'calculated_r_gpcd', 
    # 'pop_weighted_rgpcd', 
    # 'month'
]].copy()

### Merge

Filter df to July 2021 and later

In [437]:
filter_df = trim_df[trim_df.reporting_month >= "2021-06-15"].copy()

Make a month column to merge with baselines

In [438]:
filter_df["month"] = filter_df["reporting_month"].dt.month

In [439]:
merge_df = pd.merge(
    filter_df, 
    baselines_trimmed_df, 
    how="left", 
    on=["supplier_name", "month"]
).rename(
    columns={
        "calculated_total_potable_water_production_gallons_ag_excluded": "total_gallons_current",
        "total_potable_production_minus_ag_gallons": "total_gallons_baseline"
    }
).drop("month", axis=1)

### Calculate percent changes by month

In [440]:
def pct_change(new, old):
    return (new - old) / old

By district

In [441]:
merge_df["gallons_pct_change"] = merge_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

By region

In [472]:
regions_df = merge_df.groupby(
    ["hydrologic_region", "reporting_month"]
)[["total_gallons_current","total_gallons_baseline", "supplier_name"]].agg({
    "total_gallons_current":"sum",
    "total_gallons_baseline":"sum",
    "supplier_name":"size"
}).reset_index()

In [473]:
regions_df["gallons_pct_change"] = regions_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [478]:
regions_df[
    #(regions_df.hydrologic_region == "South Coast")&
    (regions_df.reporting_month=="2022-03-15")
          ]

Unnamed: 0,hydrologic_region,reporting_month,total_gallons_current,total_gallons_baseline,supplier_name,gallons_pct_change
9,Central Coast,2022-03-15,3702708563.28,3082968194.161,27,0.201
19,Colorado River,2022-03-15,4012746254.412,3161110124.731,10,0.269
29,North Coast,2022-03-15,984166724.022,1034517111.042,15,-0.049
39,North Lahontan,2022-03-15,237376917.0,235814594.0,5,0.007
49,Sacramento River,2022-03-15,12236328994.521,10659783409.406,42,0.148
59,San Francisco Bay,2022-03-15,18585356163.922,18124267090.266,43,0.025
69,San Joaquin River,2022-03-15,6692555159.785,5897621800.013,22,0.135
79,South Coast,2022-03-15,72247093556.118,56899450538.589,164,0.27
89,South Lahontan,2022-03-15,3205327242.866,2598100356.281,16,0.234
99,Tulare Lake,2022-03-15,8820643375.803,8237437664.424,29,0.071


In [445]:
merge_df[(merge_df.hydrologic_region=="South Coast")&(merge_df.reporting_month=="2022-03-15")].sort_values("total_gallons_baseline")

Unnamed: 0,supplier_name,public_water_system_id_x,reporting_month,county,hydrologic_region,total_population_served,total_gallons_current,public_water_system_id_y,total_gallons_baseline,gallons_pct_change
3678,Big Bear City Community Services District,CA3610008,2022-03-15,San Bernardino,South Coast,12738,22473943.470,CA3610008,18599599.453,0.208
3934,San Bernardino County Service Area 70J,CA3610125,2022-03-15,San Bernardino,South Coast,12591,37994226.600,CA3610125,27250954.840,0.394
3928,Rubio Canyon Land and Water Association,CA1910140,2022-03-15,Los Angeles,South Coast,9600,50291843.340,CA1910140,31930181.332,0.575
3992,Trabuco Canyon Water District,CA3010094,2022-03-15,Orange,South Coast,12921,54775553.100,CA3010094,37440328.962,0.463
3994,Triunfo Sanitation District / Oak Park Water S...,CA5610043,2022-03-15,Ventura,South Coast,12200,51158607.000,CA5610043,37798765.532,0.353
...,...,...,...,...,...,...,...,...,...,...
3841,Long Beach City of,CA1910065,2022-03-15,Los Angeles,South Coast,467730,1330123782.000,CA1910065,1209560497.024,0.100
3817,Irvine Ranch Water District,CA3010092,2022-03-15,Orange,South Coast,462317,1622314373.700,CA3010092,1278970109.489,0.268
3756,Eastern Municipal Water District,CA3310009,2022-03-15,Riverside,South Coast,633662,2092289271.000,CA3310009,1474803558.602,0.419
3938,San Diego City of,CA3710020,2022-03-15,San Diego,South Coast,1411034,4340693756.100,CA3710020,3465527681.573,0.253


In [446]:
merge_df[(merge_df.hydrologic_region=="South Coast")&(merge_df.reporting_month=="2022-02-15")].sort_values("total_gallons_baseline")

Unnamed: 0,supplier_name,public_water_system_id_x,reporting_month,county,hydrologic_region,total_population_served,total_gallons_current,public_water_system_id_y,total_gallons_baseline,gallons_pct_change
3287,Big Bear City Community Services District,CA3610008,2022-02-15,San Bernardino,South Coast,12738,19440270.660,CA3610008,19019947.794,0.022
3561,San Bernardino County Service Area 70J,CA3610125,2022-02-15,San Bernardino,South Coast,12591,29981550.510,CA3610125,28681442.605,0.045
3521,"Perris, City of","CA3310082,CA3310029",2022-02-15,Riverside,South Coast,9000,36546144.756,"CA3310082,CA3310029",39558311.400,-0.076
3617,Trabuco Canyon Water District,CA3010094,2022-02-15,Orange,South Coast,12921,40047087.900,CA3010094,39851629.522,0.005
3509,Orchard Dale Water District,CA1910101,2022-02-15,Los Angeles,South Coast,25000,40372938.900,CA1910101,41653587.913,-0.031
...,...,...,...,...,...,...,...,...,...,...
3434,Irvine Ranch Water District,CA3010092,2022-02-15,Orange,South Coast,434165,1313671565.010,CA3010092,1339830883.290,-0.020
3548,Riverside City of,CA3310031,2022-02-15,Riverside,South Coast,307138,1416148446.000,CA3310031,1373463764.805,0.031
3370,Eastern Municipal Water District,CA3310009,2022-02-15,Riverside,South Coast,632652,1615569258.000,CA3310009,1530524152.619,0.056
3565,San Diego City of,CA3710020,2022-02-15,San Diego,South Coast,1411034,3867330008.400,CA3710020,3697664238.168,0.046


Statewide

In [447]:
statewide_df = merge_df.groupby(["hydrologic_region"])[["total_gallons_current","total_gallons_baseline"]].sum().reset_index()

In [448]:
statewide_df["gallons_pct_change"] = statewide_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

### Calculate cumulative savings since July 2021

Drop suppliers with missing data

In [449]:
counts = merge_df.groupby("supplier_name")["reporting_month"].count().reset_index()

In [450]:
max_count = max(counts.reporting_month)

In [451]:
suppliers_with_complete_data = list(counts[counts.reporting_month == max_count].supplier_name)

In [452]:
complete_data = merge_df[merge_df.supplier_name.isin(suppliers_with_complete_data)].copy()

By district

In [453]:
district_cumulative_savings_df = complete_data.groupby(["supplier_name","hydrologic_region"])[["total_gallons_current","total_gallons_baseline"]].sum().reset_index()

In [454]:
district_cumulative_savings_df["cumulative_pct_change"] = complete_data.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [455]:
district_cumulative_savings_df

Unnamed: 0,supplier_name,hydrologic_region,total_gallons_current,total_gallons_baseline,cumulative_pct_change
0,Adelanto City of,South Lahontan,1223905830.448,1263786920.057,-0.110
1,Alameda County Water District,San Francisco Bay,11331000000.000,12150200000.000,-0.031
2,Alco Water Service,Central Coast,1083142000.000,1114537000.000,-0.066
3,Alhambra City of,South Coast,2520769577.593,2583428317.598,0.066
4,Amador Water Agency,San Joaquin River,1029370000.000,1061830000.000,0.079
...,...,...,...,...,...
347,Westminster City of,South Coast,2946898688.700,2957069114.882,-0.122
348,"Windsor, Town of",North Coast,798709352.799,1029971067.399,-0.011
349,Woodland City of,Sacramento River,2672098974.000,2836923315.000,-0.020
350,Yorba Linda Water District,South Coast,5329889412.330,5538838107.570,0.133


By region

In [456]:
regions_cumulative_savings_df = district_cumulative_savings_df.groupby(
    ["hydrologic_region"]
)[
    ["total_gallons_current","total_gallons_baseline","supplier_name"]
].agg(
    {"total_gallons_current":"sum","total_gallons_baseline":"sum","supplier_name":"size"}
).reset_index().rename(
    columns={"supplier_name": "total_reports"}
)

In [457]:
regions_cumulative_savings_df["cumulative_pct_change"] = regions_cumulative_savings_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [458]:
regions_cumulative_savings_df

Unnamed: 0,hydrologic_region,total_gallons_current,total_gallons_baseline,total_reports,cumulative_pct_change
0,Central Coast,37252334773.58,38397097468.589,25,-0.03
1,Colorado River,49285274361.661,48672475614.415,9,0.013
2,North Coast,11299322827.318,13107817474.506,15,-0.138
3,North Lahontan,3356302137.0,3579635596.0,4,-0.062
4,Sacramento River,149466369876.494,156500560874.047,41,-0.045
5,San Francisco Bay,196853066534.531,215987302424.975,42,-0.089
6,San Joaquin River,76789875686.876,78671372670.795,22,-0.024
7,South Coast,751253092705.079,762342437917.186,153,-0.015
8,South Lahontan,34199157481.151,34504752311.42,13,-0.009
9,Tulare Lake,114404152725.043,117113914983.965,28,-0.023


Statewide

In [459]:
district_cumulative_savings_df["state"] = "California"

In [460]:
statewide_cumulative_savings_df = district_cumulative_savings_df.groupby(
    ["state"]
)[
    ["total_gallons_current","total_gallons_baseline", "supplier_name"]
].agg({
    "total_gallons_current": "sum",
    "total_gallons_baseline": "sum",
    "supplier_name": "size"
}).reset_index().rename(
    columns={"supplier_name": "total_reports"}
)

In [461]:
#statewide_cumulative_savings_df["total_reports"] = statewide_cumulative_savings_df["total_reports"]

In [462]:
statewide_cumulative_savings_df["cumulative_pct_change"] = statewide_cumulative_savings_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [463]:
statewide_cumulative_savings_df

Unnamed: 0,state,total_gallons_current,total_gallons_baseline,total_reports,cumulative_pct_change
0,California,1424158949108.733,1468877367335.897,352,-0.03


### Charts

In [464]:
alt.Chart(
    regions_df[regions_df.hydrologic_region=="South Coast"]
).mark_bar().encode(
    x="reporting_month:O",
    y="gallons_pct_change:Q",
    color=alt.condition(
        alt.datum.gallons_pct_change > 0,
        alt.value("#e6ae56"),  # The positive color
        alt.value("#83c6e0")  # The negative color
    ),
    tooltip=["gallons_pct_change"]
).properties(title="Monthly water conservation in the South Coast", width=600)

### Export

Monthly

In [465]:
statewide_df.to_csv("../data/processed/monthly-conservation/statewide-conservation-monthly.csv", index=False)

In [466]:
regions_df.to_csv("../data/processed/monthly-conservation/regional-conservation-monthly.csv", index=False)

In [467]:
merge_df.to_csv("../data/processed/monthly-conservation/district-level-conservation-monthly.csv", index=False)

Cumulative

In [468]:
statewide_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/statewide-conservation-cumulative.csv", index=False)

In [469]:
regions_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/monthly-conservation-cumulative.csv", index=False)

In [470]:
district_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/district-level-conservation-cumulative.csv", index=False)