In [191]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [192]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [193]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [194]:
df = pd.read_csv("../data/processed/district-level-residential-use.csv", parse_dates=["reporting_month"])

In [195]:
baselines_df = pd.read_csv("../data/raw/uw-2020-baseline-values.csv")

### Clean

Clean up baselines column names as we did in the previous notebook

In [197]:
baselines_df.columns = baselines_df.columns.str.replace(" ","_").str.replace("-","_").str.replace(r'[#,@,&,(,)]', '', regex=True).str.lower()

In [198]:
baselines_df.supplier_name = baselines_df.supplier_name.str.replace("  ", " ")

In [199]:
baselines_trimmed_df = baselines_df[[
    'supplier_name', 
    'public_water_system_id', 
    'month', 
    #'original_units',
    # 'total_potable_production_original_units',
    # 'potable_commercial_agriculture_original_units',
    'total_potable_production_minus_ag_gallons', 
    # 'staff_notes'
]].copy()

Get rid of some unnecessary columns

In [200]:
trim_df = df[[
    'supplier_name', 
    'public_water_system_id', 
    'reporting_month', 
    'county',
    'hydrologic_region', 
    # 'climate_zone', 
    'total_population_served',
    'calculated_total_potable_water_production_gallons_ag_excluded',
    'calculated_r_gpcd', 
    'pop_weighted_rgpcd', 
    # 'month'
]].copy()

### Merge

Filter df to July 2021 and later

In [201]:
filter_df = trim_df[trim_df.reporting_month >= "2021-06-15"].copy()

Make a month column to merge with baselines

In [202]:
filter_df["month"] = filter_df["reporting_month"].dt.month

In [203]:
merge_df = pd.merge(
    filter_df, 
    baselines_trimmed_df, 
    how="left", 
    on=["supplier_name", "month"]
).rename(
    columns={
        "calculated_total_potable_water_production_gallons_ag_excluded": "total_gallons_current",
        "total_potable_production_minus_ag_gallons": "total_gallons_baseline"
    }
).drop("month", axis=1)

### Calculate percent changes by month

In [204]:
def pct_change(new, old):
    return (new - old) / old

By district

In [205]:
merge_df["gallons_pct_change"] = merge_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

By region

In [206]:
regions_df = merge_df.groupby(
    ["hydrologic_region", "reporting_month"]
)[["total_gallons_current","total_gallons_baseline", "supplier_name"]].agg({
    "total_gallons_current":"sum",
    "total_gallons_baseline":"sum",
    "supplier_name":"size"
}).reset_index()

In [207]:
regions_df["gallons_pct_change"] = regions_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [208]:
regions_df[
    #(regions_df.hydrologic_region == "South Coast")&
    (regions_df.reporting_month=="2022-04-15")
]

Unnamed: 0,hydrologic_region,reporting_month,total_gallons_current,total_gallons_baseline,supplier_name,gallons_pct_change


Statewide

In [209]:
statewide_df = merge_df.groupby(["hydrologic_region"])[["total_gallons_current","total_gallons_baseline"]].sum().reset_index()

In [210]:
statewide_df["gallons_pct_change"] = statewide_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

### Calculate cumulative savings since July 2021

Drop suppliers with missing data

In [211]:
counts = merge_df.groupby("supplier_name")["reporting_month"].count().reset_index()

In [212]:
max_count = max(counts.reporting_month)

In [213]:
suppliers_with_complete_data = list(counts[counts.reporting_month == max_count].supplier_name)

In [214]:
complete_data = merge_df[merge_df.supplier_name.isin(suppliers_with_complete_data)].copy()

By district

In [215]:
district_cumulative_savings_df = complete_data.groupby(["supplier_name","hydrologic_region"])[["total_gallons_current","total_gallons_baseline"]].sum().reset_index()

In [216]:
district_cumulative_savings_df["cumulative_pct_change"] = complete_data.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [217]:
district_cumulative_savings_df

Unnamed: 0,supplier_name,hydrologic_region,total_gallons_current,total_gallons_baseline,cumulative_pct_change
0,Adelanto City of,South Lahontan,1223905830.448,1263786920.057,-0.110
1,Alameda County Water District,San Francisco Bay,11331000000.000,12150200000.000,-0.031
2,Alco Water Service,Central Coast,1083142000.000,1114537000.000,-0.066
3,Alhambra City of,South Coast,2520769577.593,2583428317.598,0.066
4,Amador Water Agency,San Joaquin River,1029370000.000,1061830000.000,0.079
...,...,...,...,...,...
347,Westminster City of,South Coast,2946898688.700,2957069114.882,-0.122
348,"Windsor, Town of",North Coast,798709352.799,1029971067.399,-0.011
349,Woodland City of,Sacramento River,2672098974.000,2836923315.000,-0.020
350,Yorba Linda Water District,South Coast,5329889412.330,5538838107.570,0.133


By region

In [218]:
regions_cumulative_savings_df = district_cumulative_savings_df.groupby(
    ["hydrologic_region"]
)[
    ["total_gallons_current","total_gallons_baseline","supplier_name"]
].agg(
    {"total_gallons_current":"sum","total_gallons_baseline":"sum","supplier_name":"size"}
).reset_index().rename(
    columns={"supplier_name": "total_reports"}
)

In [219]:
regions_cumulative_savings_df["cumulative_pct_change"] = regions_cumulative_savings_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [220]:
regions_cumulative_savings_df

Unnamed: 0,hydrologic_region,total_gallons_current,total_gallons_baseline,total_reports,cumulative_pct_change
0,Central Coast,37252334773.58,38397097468.589,25,-0.03
1,Colorado River,49285274361.661,48672475614.415,9,0.013
2,North Coast,11299322827.318,13107817474.506,15,-0.138
3,North Lahontan,3356302137.0,3579635596.0,4,-0.062
4,Sacramento River,149466369876.494,156500560874.047,41,-0.045
5,San Francisco Bay,196853066534.531,215987302424.975,42,-0.089
6,San Joaquin River,76789875686.876,78671372670.795,22,-0.024
7,South Coast,751253092705.079,762342437917.186,153,-0.015
8,South Lahontan,34199157481.151,34504752311.42,13,-0.009
9,Tulare Lake,114404152725.043,117113914983.965,28,-0.023


Statewide

In [221]:
district_cumulative_savings_df["state"] = "California"

In [222]:
statewide_cumulative_savings_df = district_cumulative_savings_df.groupby(
    ["state"]
)[
    ["total_gallons_current","total_gallons_baseline", "supplier_name"]
].agg({
    "total_gallons_current": "sum",
    "total_gallons_baseline": "sum",
    "supplier_name": "size"
}).reset_index().rename(
    columns={"supplier_name": "total_reports"}
)

In [223]:
#statewide_cumulative_savings_df["total_reports"] = statewide_cumulative_savings_df["total_reports"]

In [224]:
statewide_cumulative_savings_df["cumulative_pct_change"] = statewide_cumulative_savings_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [225]:
statewide_cumulative_savings_df

Unnamed: 0,state,total_gallons_current,total_gallons_baseline,total_reports,cumulative_pct_change
0,California,1424158949108.733,1468877367335.897,352,-0.03


### Charts

In [226]:
alt.Chart(
    regions_df[regions_df.hydrologic_region=="South Coast"]
).mark_bar().encode(
    x="reporting_month:O",
    y="gallons_pct_change:Q",
    color=alt.condition(
        alt.datum.gallons_pct_change > 0,
        alt.value("#e6ae56"),  # The positive color
        alt.value("#83c6e0")  # The negative color
    ),
    tooltip=["gallons_pct_change"]
).properties(title="Monthly water conservation in the South Coast", width=600)

### Export

Monthly

In [227]:
statewide_df.to_csv("../data/processed/monthly-conservation/statewide-conservation-monthly.csv", index=False)

In [228]:
regions_df.to_csv("../data/processed/monthly-conservation/regional-conservation-monthly.csv", index=False)

In [229]:
merge_df.to_csv("../data/processed/monthly-conservation/district-level-conservation-monthly.csv", index=False)

Cumulative

In [230]:
statewide_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/statewide-conservation-cumulative.csv", index=False)

In [231]:
regions_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/monthly-conservation-cumulative.csv", index=False)

In [232]:
district_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/district-level-conservation-cumulative.csv", index=False)