In [53]:
import pandas as pd
import numpy as np
import altair as alt
import altair_latimes as lat

In [54]:
# import district data downloaded from portal

In [55]:
df = pd.read_csv("data/raw/water-use-by-district-timeseries.csv", parse_dates=["reporting_month"])

In [56]:
# import regional data scraped from dashboard 

In [97]:
regional_usage = pd.read_csv("data/raw/monthly-water-use-by-region.csv")

---
### Clean and analyze

In [100]:
keeps = ['supplier_name', 'reporting_month', 'county',
       'hydrologic_region', 'total_population_served',
       # 'county_under_drought_declaration',
       # 'water_shortage_contingency_stage_invoked',
       # 'water_shortage_level_indicator',
       'residential_use_pct',
       'residential_water_use_gallons',
        'calculated_r_gpcd']

In [101]:
trim_df = df[keeps]

In [102]:
regional_usage["reporting_month"] = pd.to_datetime(
    regional_usage["year"].astype(str) + '-' + regional_usage["month"].astype(str) + '-15'
)

In [103]:
merge_df = pd.merge(trim_df, regional_usage[["hydrologic_region","reporting_month","pop_weighted_rgpcd_region"]], how="left", on=["hydrologic_region","reporting_month"])

In [104]:
melt = pd.melt(
    merge_df, 
    id_vars=["supplier_name","hydrologic_region","reporting_month"], 
    value_vars=["calculated_r_gpcd","pop_weighted_rgpcd_region"]
)

In [105]:
base = alt.Chart(
    merge_df[(merge_df.supplier_name == "Los Angeles Department of Water and Power") & (merge_df.reporting_month > "2021-04-01")]
).encode(
    x=alt.X("yearmonth(reporting_month):O"),
)

bar = base.mark_bar(color="#83c6e0").encode(
    y=alt.Y("calculated_r_gpcd", stack=None)
)

avg_line = base.mark_line(interpolate='step', color='#1281aa').encode(
    y=alt.Y("pop_weighted_rgpcd_region")
)

goal_line = alt.Chart(pd.DataFrame({'y': [80]})).mark_rule(color="#b75a36",strokeDash=[10,11]).encode(y='y')

(bar + avg_line + goal_line).properties(title="LADWP residential water usage compared to regional average", width=600)