In [82]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [83]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [145]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [146]:
df = pd.read_csv("../data/processed/district-level-residential-use.csv", parse_dates=["reporting_month"])

In [147]:
baselines_df = pd.read_csv("../data/raw/uw-2020-baseline-values.csv")

### Clean

Clean up baselines column names as we did in the previous notebook

In [148]:
baselines_df.columns = baselines_df.columns.str.replace(" ","_").str.replace("-","_").str.replace(r'[#,@,&,(,)]', '', regex=True).str.lower()

In [149]:
baselines_df.supplier_name = baselines_df.supplier_name.str.replace("  ", " ")

In [150]:
baselines_trimmed_df = baselines_df[[
    'supplier_name', 
    'public_water_system_id', 
    'month', 
    #'original_units',
    # 'total_potable_production_original_units',
    # 'potable_commercial_agriculture_original_units',
    'total_potable_production_minus_ag_gallons', 
    # 'staff_notes'
]].copy()

In [151]:
trim_df = df[[
    'supplier_name', 
    'public_water_system_id', 
    'reporting_month', 
    'county',
    'hydrologic_region', 
    # 'climate_zone', 
    'total_population_served',
    'calculated_total_potable_water_production_gallons_ag_excluded',
    # 'calculated_r_gpcd', 
    # 'pop_weighted_rgpcd', 
    # 'month'
]].copy()

### Merge

Filter df to July 2021 and later

In [152]:
filter_df = trim_df[trim_df.reporting_month >= "2021-07-15"].copy()

Make a month column to merge with baselines

In [153]:
filter_df["month"] = filter_df["reporting_month"].dt.month

In [154]:
merge_df = pd.merge(
    filter_df, 
    baselines_trimmed_df, 
    how="left", 
    on=["supplier_name", "public_water_system_id", "month"]
).rename(
    columns={
        "calculated_total_potable_water_production_gallons_ag_excluded": "total_gallons_current",
        "total_potable_production_minus_ag_gallons": "total_gallons_baseline"
    }
).drop("month", axis=1)

In [156]:
merge_df

Unnamed: 0,supplier_name,public_water_system_id,reporting_month,county,hydrologic_region,total_population_served,total_gallons_current,total_gallons_baseline
0,Adelanto City of,CA3610001,2021-07-15,San Bernardino,South Lahontan,30743,150480384.476,170230680.945
1,Alameda County Water District,CA0110001,2021-07-15,Alameda,San Francisco Bay,356823,1455800000.000,1545900000.000
2,Alco Water Service,CA2710001,2021-07-15,Monterey,Central Coast,30509,131292000.000,135785000.000
3,Alhambra City of,CA1910001,2021-07-15,Los Angeles,South Coast,85168,289886825.130,294292716.295
4,Amador Water Agency,"CA0310002,CA0310019,CA0310012,CA0310021,CA0310003",2021-07-15,Amador,San Joaquin River,22468,148380000.000,149450000.000
...,...,...,...,...,...,...,...,...
3614,Whittier City of,CA1910173,2022-03-15,Los Angeles,South Coast,49954,183802773.570,150217507.847
3615,"Windsor, Town of",CA4910017,2022-03-15,Sonoma,North Coast,28397,67777008.000,77150213.114
3616,Woodland City of,CA5710006,2022-03-15,Yolo,Sacramento River,60978,229077909.000,203351955.000
3617,Yorba Linda Water District,CA3010037,2022-03-15,Orange,South Coast,80540,488998078.680,368889400.080


### Calculate percent changes by month

In [157]:
def pct_change(new, old):
    return (new - old) / old

By district

In [158]:
merge_df["gallons_pct_change"] = merge_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

By region

In [159]:
regions_df = merge_df.groupby(["hydrologic_region", "reporting_month"])[["total_gallons_current","total_gallons_baseline"]].sum().reset_index()

In [160]:
regions_df["gallons_pct_change"] = regions_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [97]:
test = merge_df.groupby(
    ["hydrologic_region", "reporting_month"]
)[["supplier_name"]].size().reset_index()
test[test.hydrologic_region=="South Coast"]

Unnamed: 0,hydrologic_region,reporting_month,0
63,South Coast,2021-07-15,175
64,South Coast,2021-08-15,176
65,South Coast,2021-09-15,176
66,South Coast,2021-10-15,175
67,South Coast,2021-11-15,175
68,South Coast,2021-12-15,173
69,South Coast,2022-01-15,168
70,South Coast,2022-02-15,167
71,South Coast,2022-03-15,164


Statewide

In [98]:
statewide_df = merge_df.groupby(["hydrologic_region"])[["total_gallons_current","total_gallons_baseline"]].sum().reset_index()

In [99]:
statewide_df["gallons_pct_change"] = statewide_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

### Calculate cumulative savings since July 2021

Drop suppliers with missing data

In [164]:
counts = merge_df.groupby("supplier_name")["reporting_month"].count().reset_index()

In [165]:
max_count = max(counts.reporting_month)

In [166]:
suppliers_with_complete_data = list(counts[counts.reporting_month == max_count].supplier_name)

In [167]:
complete_data = merge_df[merge_df.supplier_name.isin(suppliers_with_complete_data)].copy()

In [178]:
len(merge_df)

3619

In [177]:
len(complete_data)

3195

By district

In [168]:
district_cumulative_savings_df = complete_data.groupby(["supplier_name","hydrologic_region"])[["total_gallons_current","total_gallons_baseline"]].sum().reset_index()

In [169]:
district_cumulative_savings_df["cumulative_pct_change"] = complete_data.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [170]:
district_cumulative_savings_df

Unnamed: 0,supplier_name,hydrologic_region,total_gallons_current,total_gallons_baseline,cumulative_pct_change
0,Adelanto City of,South Lahontan,1088699171.708,1111940389.640,-0.116
1,Alameda County Water District,San Francisco Bay,9937000000.000,10712200000.000,-0.058
2,Alco Water Service,Central Coast,959821000.000,982522000.000,-0.033
3,Alhambra City of,South Coast,2234203174.153,2314721455.351,-0.015
4,Amador Water Agency,San Joaquin River,880510000.000,923830000.000,-0.007
...,...,...,...,...,...
350,Westminster City of,South Coast,2625218581.500,2640211187.267,-0.044
351,"Windsor, Town of",North Coast,684948251.679,900886002.147,0.003
352,Woodland City of,Sacramento River,2309280939.000,2473074091.000,0.009
353,Yorba Linda Water District,South Coast,4708618637.220,4928294347.380,-0.015


By region

In [187]:
regions_cumulative_savings_df = district_cumulative_savings_df.groupby(
    ["hydrologic_region"]
)[
    ["total_gallons_current","total_gallons_baseline","supplier_name"]
].agg(
    {"total_gallons_current":"sum","total_gallons_baseline":"sum","supplier_name":"size"}
).reset_index().rename(
    columns={"supplier_name": "total_reports"}
)

In [188]:
regions_cumulative_savings_df["cumulative_pct_change"] = regions_cumulative_savings_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [189]:
regions_cumulative_savings_df

Unnamed: 0,hydrologic_region,total_gallons_current,total_gallons_baseline,total_reports,cumulative_pct_change
0,Central Coast,32716146255.22,33921509720.741,25,-0.036
1,Colorado River,43090468285.583,42623037214.57,9,0.011
2,North Coast,9937324703.702,11661837276.164,15,-0.148
3,North Lahontan,2847018849.0,3107029236.0,4,-0.084
4,Sacramento River,127382599515.306,135689555285.983,41,-0.061
5,San Francisco Bay,172520482717.071,190363704001.582,42,-0.094
6,San Joaquin River,66289272428.412,68350510847.275,22,-0.03
7,South Coast,670950012339.83,677788954393.601,156,-0.01
8,South Lahontan,29647408404.087,30298866287.834,13,-0.022
9,Tulare Lake,98771652596.003,101693050661.585,28,-0.029


Statewide

In [111]:
complete_data["state"] = "California"

In [112]:
statewide_cumulative_savings_df = complete_data.groupby(
    ["state"]
)[
    ["total_gallons_current","total_gallons_baseline", "supplier_name"]
].agg({
    "total_gallons_current": "sum",
    "total_gallons_baseline": "sum",
    "supplier_name": "size"
}).reset_index().rename(
    columns={"supplier_name": "total_reports"}
)

In [113]:
statewide_cumulative_savings_df["total_reports"] = statewide_cumulative_savings_df["total_reports"]/9

In [114]:
statewide_cumulative_savings_df["cumulative_pct_change"] = statewide_cumulative_savings_df.apply(lambda x: pct_change(x.total_gallons_current, x.total_gallons_baseline), axis=1)

In [115]:
statewide_cumulative_savings_df

Unnamed: 0,state,total_gallons_current,total_gallons_baseline,total_reports,cumulative_pct_change
0,California,1254152000000.0,1295498000000.0,355.0,-0.031915


### Charts

In [116]:
alt.Chart(
    regions_df[regions_df.hydrologic_region=="South Coast"]
).mark_bar().encode(
    x="reporting_month:O",
    y="gallons_pct_change:Q",
    color=alt.condition(
        alt.datum.gallons_pct_change > 0,
        alt.value("#e6ae56"),  # The positive color
        alt.value("#83c6e0")  # The negative color
    ),
    tooltip=["gallons_pct_change"]
).properties(title="Monthly water conservation in the South Coast", width=600)

### Export

Monthly

In [193]:
statewide_df.to_csv("../data/processed/monthly-conservation/statewide-conservation-monthly.csv", index=False)

In [194]:
regions_df.to_csv("../data/processed/monthly-conservation/regional-conservation-monthly.csv", index=False)

In [196]:
merge_df.to_csv("../data/processed/monthly-conservation/district-level-conservation-monthly.csv", index=False)

Cumulative

In [197]:
statewide_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/statewide-conservation-cumulative.csv", index=False)

In [198]:
regions_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/monthly-conservation-cumulative.csv", index=False)

In [199]:
district_cumulative_savings_df.to_csv("../data/processed/cumulative-conservation/district-level-conservation-cumulative.csv", index=False)