In [52]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [53]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Import

District data downloaded from portal

In [54]:
df = pd.read_csv("../data/raw/water-use-by-district-timeseries.csv", parse_dates=["reporting_month"])

Regional data scraped from dashboard 

In [55]:
regional_usage = pd.read_csv("../data/raw/monthly-water-use-by-region.csv")

---
### Clean and analyze

In [58]:
keeps = ['supplier_name', 'reporting_month', 'county',
       'hydrologic_region', 'total_population_served',
       # 'county_under_drought_declaration',
       'water_shortage_contingency_stage_invoked',
       # 'water_shortage_level_indicator',
        'calculated_r_gpcd']

In [59]:
trim_df = df[keeps]

In [60]:
regional_usage["reporting_month"] = pd.to_datetime(
    regional_usage["year"].astype(str) + '-' + regional_usage["month"].astype(str) + '-15'
)

In [61]:
merge_df = pd.merge(trim_df, regional_usage[["hydrologic_region","reporting_month","pop_weighted_rgpcd"]], how="left", on=["hydrologic_region","reporting_month"])

In [65]:
merge_df.groupby(["reporting_month","water_shortage_contingency_stage_invoked"])["supplier_name"].count().reset_index()

Unnamed: 0,reporting_month,water_shortage_contingency_stage_invoked,supplier_name
0,2014-06-15,Permanent Water Limitations,1
1,2014-06-15,0,7
2,2014-06-15,1,48
3,2014-06-15,1-A,2
4,2014-06-15,1B,1
...,...,...,...
8473,2022-03-15,permanent restrictions,1
8474,2022-03-15,stage 1,3
8475,2022-03-15,stage 2,2
8476,2022-03-15,stage II,1


In [48]:
melt = pd.melt(
    merge_df, 
    id_vars=["supplier_name","hydrologic_region","reporting_month"], 
    value_vars=["calculated_r_gpcd","pop_weighted_rgpcd"]
)

In [51]:
base = alt.Chart(
    merge_df[(merge_df.supplier_name == "Los Angeles Department of Water and Power") & (merge_df.reporting_month > "2021-04-01")]
).encode(
    x=alt.X("yearmonth(reporting_month):O"),
)

bar = base.mark_bar(color="#83c6e0").encode(
    y=alt.Y("calculated_r_gpcd", stack=None)
)

avg_line = base.mark_line(interpolate='step', color='#1281aa').encode(
    y=alt.Y("pop_weighted_rgpcd")
)

goal_line = alt.Chart(pd.DataFrame({'y': [80]})).mark_rule(color="#b75a36",strokeDash=[10,11]).encode(y='y')

(bar + avg_line + goal_line).properties(title="LADWP residential water usage compared to regional average", width=600)