In [5]:
import pandas as pd
import numpy as np
import altair as alt
import altair_latimes as lat

In [2]:
# import district data downloaded from portal

In [3]:
# import regional data scraped from dashboard 

In [6]:
regional_usage = pd.read_csv("monthly-rgpcd-by-region.csv")

---
### Clean and analyze

Create clean columns for pct residential use

In [65]:
df["preliminary_residential_use_pct"] = df["preliminary_percent_residential_use"]/100
df["final_residential_use_pct"] = df["final_percent_residential_use"]/100

Now create a new column with the final percentage

In [66]:
df["residential_use_pct"] = df["final_residential_use_pct"]

And fill any nulls with the preliminary percentage

In [67]:
df["residential_use_pct"] = df["residential_use_pct"].fillna(df["preliminary_residential_use_pct"])

Calculate residential use per district

In [68]:
df["residential_water_use_gallons"] = df["residential_use_pct"] * df["calculated_total_potable_water_production_gallons_ag_excluded"]

Group by hydro zone

In [69]:
regions = df.groupby(["hydrologic_region","reporting_month"])[["total_population_served","residential_water_use_gallons"]].sum().reset_index()

In [None]:
regions["residential_gpcd_region"] = (
    regions["residential_water_use_gallons"] /
    regions["total_population_served"] /
    31
)

In [None]:
regions[regions.reporting_month == regions.reporting_month.max()]#.total_population_served.sum()

Merge these regional values back onto original dataframe

In [None]:
keeps = ['supplier_name', 'reporting_month', 'county',
       'hydrologic_region', 'total_population_served',
       # 'county_under_drought_declaration',
       # 'water_shortage_contingency_stage_invoked',
       # 'water_shortage_level_indicator',
       'residential_use_pct',
       'residential_water_use_gallons',
        'calculated_r_gpcd']

In [None]:
trim_df = df[keeps]

In [77]:
regional_usage["reporting_month"] = pd.to_datetime(
    regional_usage["year"].astype(str) + '-' + regional_usage["month"].astype(str) + '-15'
)

In [78]:
merge_df = pd.merge(trim_df, regional_usage[["hydrologic_region","reporting_month","pop_weighted_rgpcd_region"]], how="left", on=["hydrologic_region","reporting_month"])

In [79]:
melt = pd.melt(
    merge_df, 
    id_vars=["supplier_name","hydrologic_region","reporting_month"], 
    value_vars=["calculated_r_gpcd","pop_weighted_rgpcd_region"]
)

In [139]:
base = alt.Chart(
    merge_df[(merge_df.supplier_name == "Los Angeles Department of Water and Power") & (merge_df.reporting_month > "2021-04-01")]
).encode(
    x=alt.X("yearmonth(reporting_month):O"),
)

bar = base.mark_bar(color="#83c6e0").encode(
    y=alt.Y("calculated_r_gpcd", stack=None)
)

avg_line = base.mark_line(interpolate='step', color='#1281aa').encode(
    y=alt.Y("pop_weighted_rgpcd_region")
)

goal_line = alt.Chart(pd.DataFrame({'y': [80]})).mark_rule(color="#939598",strokeDash=[3,5]).encode(y='y')

(bar + avg_line + goal_line).properties(title="LADWP residential water usage compared to regional average", width=600)