In [1]:
import glob
import datetime
import pandas as pd
import altair as alt
import altair_latimes as lat

In [2]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [3]:
pd.set_option('display.max_columns', 50)
#pd.set_option('display.max_colwidth', None)

### Get latest file from directory

In [4]:
file_list = glob.glob("../data/raw/uw_drought_response_actions/*.xlsx")

In [5]:
date_list = []
for f in file_list:
    raw_name = f.replace("../data/raw/uw_drought_response_actions/uw-drought-response","").replace(".xlsx","")
    clean_name = raw_name.replace("_","-")
    # ignore revision notes
    #trim_name = clean_name.split("-rev")[0]
    # get file date
    #file_date = clean_name.split("data")[1]
    print(clean_name)
    try:
        parsed_date = datetime.datetime.strptime(clean_name, '%m%d%y')
        print(parsed_date)
    except ValueError:
        print(f"skip ```{raw_name}``` invalid date")
    date_list.append(parsed_date)

070822
2022-07-08 00:00:00


In [6]:
latest = max(date_list)

In [7]:
latest_xl_file = f"../data/raw/uw_drought_response_actions/uw-drought-response{latest.strftime('%m%d%y')}.xlsx"

In [8]:
#file = "../data/raw/uw_drought_response_actions/uw-drought-response070822.xlsx"

In [9]:
pd.ExcelFile(latest_xl_file).sheet_names

['Listed responses', 'Machine readable']

In [10]:
df = pd.read_excel(latest_xl_file, sheet_name='Machine readable', parse_dates=["Reporting Month"])

Import clean names

In [11]:
clean_names = pd.read_csv("../data/metadata/urban-water-suppliers-clean-names.csv")

### Prep data

Take latest month

In [12]:
latest_df = df[df["Reporting Month"] == df["Reporting Month"].max()]

In [13]:
df["Supplier Name"] = df["Supplier Name"].str.replace("  ", " ")

In [14]:
df["Supplier Name"] = df["Supplier Name"].str.strip()

In [122]:
merge_df = pd.merge(
    latest_df,
    clean_names[["id","display_name"]],
    how="left",
    left_on=["Public Water System ID"],
    right_on=["id"]
)

In [123]:
len(
    merge_df[merge_df["display_name"].isna()]
)

0

### List the main identifying columns

In [124]:
global_columns = [
    'Supplier Name', 
    'display_name',
    'Public Water System ID', 
    'Reporting Month',
    'Total Population Served', 
    'County', 
    'Hydrologic Region',
    'Climate Zone', 
]

In [125]:
drought_stage_columns = [
    # 'Water Shortage Contingency Stage Invoked', # junk
    'DWR State Standard Level corresponding to Stage',
    'Enacted any measures', 'Not experiencing local shortage',
    "Don't have the resources to enact any of the listed Response Activities",
    'Not Enacted Other'
]

In [126]:
for c in drought_stage_columns:
    responses = df[c].unique()
    print('\033[1m' + c + '\033[0m')
    for n, r in enumerate(responses):
        print(f'    {(n+1)}. {r}')

[1mDWR State Standard Level corresponding to Stage[0m
    1. 5.0
    2. nan
    3. 2.0
    4. 3.0
    5. 0.0
    6. 1.0
    7. 4.0
[1mEnacted any measures[0m
    1. Yes
    2. nan
    3. No
[1mNot experiencing local shortage[0m
    1. nan
    2. Y
[1mDon't have the resources to enact any of the listed Response Activities[0m
    1. nan
    2. Y
[1mNot Enacted Other[0m
    1. nan
    2. Y


### Assign columns to "conservation action categories," such as water restrictions and water waste

In [127]:
cats = {
    # "Enhanced outreach and communication": "Demand", # not in data
    "Raising rates": "Demand",
    "Apply drought surcharges": "Demand",
    "Reduced allocations (for agencies with budget-based rates)": "Demand",
    "Residential water audits": "Demand",
    "CII water audits": "Demand",
    "Expanded existing rebate program": "Demand",
    "Rationing": "Demand",
    "Turf replacement/rebate": "Demand",
    "Demand Reduction Other": "Demand",
    "Greywater": "Supply",
    "On-site treatment and reuse": "Supply",
    "Desalination": "Supply",
    "Recycled Water": "Supply",
    "Remediated Groundwater": "Supply",
    "Supply Augmentation Other": "Supply",
    "Weekly watering restrictions": "Water Restrictions",
    "Excessive irrigation of outdoor landscapes": "Water Restrictions",
    "Washing a motor vehicle with a hose not fitted with a shut-off nozzle": "Water Restrictions",
    "Application of potable water directly to driveways or sidewalks": "Water Restrictions",
    "Use of potable water in decorative water features": "Water Restrictions",
    "The application of water to irrigate turf and ornamental landscapes during and within 48 hours after measurable rainfall": "Water Restrictions",
    "Restrictions Other": "Water Restrictions",
    "Not serving drinking water other than upon request in eating or drinking establishments": "Industry-Specific",
    "Operators of hotels and motels providing guests with the option of choosing not to have towels and linens laundered daily": "Industry-Specific",
    "Industry Other": "Industry-Specific",
    "E-mails": "Communication",
    "Paper mail": "Communication",
    "Notifications via Customer App": "Communication",
    "Website": "Communication",
    "Articles/News releases": "Communication",
    "Youtube": "Communication",
    "Facebook": "Communication",
    "Instagram": "Communication",
    "Other Social Media": "Communication",
    "Community events": "Communication",
    "Door hanger": "Communication",
    "Workshops": "Communication",
    "Television": "Communication",
    "Radio": "Communication",
    "Billboard": "Communication",
    "Paid Media Advertising": "Communication",
    "Bus shelter": "Communication",
    "Communication Other (Fill-in)": "Communication",
    "Notification via customer app": "Water Waste",
    "Notification via Phone call": "Water Waste",
    "Notification via Letter": "Water Waste",
    "Notification via Door hanger": "Water Waste",
    "Notification via Other": "Water Waste",
    "Fine": "Water Waste",
    "Assigned a different rate tier": "Water Waste",
    "Penalty Other": "Water Waste"
}

In [128]:
demand_cols = [k for k,v in cats.items() if v == "Demand"]

In [129]:
supply_cols = [k for k,v in cats.items() if v == "Supply"]

In [130]:
restrictions_cols = [k for k,v in cats.items() if v == "Water Restrictions"]

In [131]:
comms_cols = [k for k,v in cats.items() if v == "Communications"]

In [132]:
waste_cols = [k for k,v in cats.items() if v == "Water Waste"]

### Restrictions

In [147]:
restrictions_df = merge_df[global_columns + ['Enacted any measures', 'Not experiencing local shortage',
    "Don't have the resources to enact any of the listed Response Activities"] + restrictions_cols]

In [153]:
restrictions_df.head()["County"]

0    Alameda,Contra Costa
1                  Orange
2             Los Angeles
3                  Merced
4              Stanislaus
Name: County, dtype: object

In [150]:
renamed_restrictions = restrictions_df.rename(columns={
    "Supplier Name": "supplier_name",
    "Public Water System ID": "pwsid",
    "Reporting Month": "reporting_month",
    "Enacted any measures": "enacted_measures",
    "Not experiencing local shortage": "no_local_shortage",
    "Don't have the resources to enact any of the listed Response Activities": "no_resources"
})

In [151]:
restrictions_trimmed = renamed_restrictions[
    ['supplier_name', 'display_name', 'pwsid', 'reporting_month',
    'enacted_measures', 'no_local_shortage', 'no_resources',
    'Weekly watering restrictions',
    'Excessive irrigation of outdoor landscapes',
    'Washing a motor vehicle with a hose not fitted with a shut-off nozzle',
    'Application of potable water directly to driveways or sidewalks',
    'Use of potable water in decorative water features',
    'The application of water to irrigate turf and ornamental landscapes during and within 48 hours after measurable rainfall',
    'Restrictions Other']
]

In [152]:
restrictions_trimmed.to_csv("../data/processed/restrictions/latest.csv",
    index=False
)

### Water waste

In [137]:
#waste_df = merge_df[global_columns + waste_cols]

In [138]:
#waste_long = pd.melt(waste_df, id_vars=global_columns)

In [139]:
# waste_list = waste_long[
#     (waste_long["display_name"] == supplier)
# ]

In [140]:
#rdf = pd.read_excel(file, sheet_name='Listed responses', parse_dates=["Reporting Month"])

In [141]:
# rdf[ 
#     (rdf["Supplier Name"] == supplier) & 
#     (rdf["Reporting Month"] == rdf["Reporting Month"].max()) 
# ]['Type of water waste']

In [142]:
# latest_rdf = rdf[ 
#     (rdf["Reporting Month"] == rdf["Reporting Month"].max()) 
# ]

In [143]:
# waste_df = rdf[
#        ['Supplier Name', 'Public Water System ID', 'Reporting Month', 'Waste Actions', 'Water waste comments',
#        'Type of water waste',
#        'Number of Water Waste incidents identified or reported',
#        'Number of water waste complaints investigated',
#        'Number of water wasters notified',
#        'Number of water waste incidents resulting in penalties']
# ].rename(columns={
#     'Supplier Name': 'supplier_name', 
#     'Public Water System ID': 'pswid', 
#     'Reporting Month': 'reporting_month',
#     'Waste Actions': 'actions', 
#     'Water waste comments': 'comments', 
#     'Type of water waste': 'type',
#     'Number of Water Waste incidents identified or reported': 'incidents_reported',
#     'Number of water waste complaints investigated': 'complaints_investigated',
#     'Number of water wasters notified': 'wasters_notified',
#     'Number of water waste incidents resulting in penalties': 'penalties'
# })

In [144]:
# waste_grouped = waste_df.groupby('reporting_month')[['incidents_reported','complaints_investigated','wasters_notified', 'penalties']].sum().reset_index()

In [145]:
# alt.Chart(
#     waste_df[waste_df.supplier_name == "Los Angeles Department of Water and Power"]
#     #waste_grouped
# ).mark_bar().encode(
#     x="yearmonthdate(reporting_month):O",
#     y="incidents_reported",
#     tooltip=["reporting_month","incidents_reported"]
# ).properties(
#     width=600
# )