In [154]:
import glob
import datetime
import pandas as pd
import altair as alt
import altair_latimes as lat

In [155]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [156]:
pd.set_option('display.max_columns', 50)
#pd.set_option('display.max_colwidth', None)

### Get latest file from directory

In [187]:
file_list = glob.glob("../data/raw/uw_drought_response_actions/*.xlsx")

In [188]:
date_list = []
for f in file_list:
    raw_name = f.replace("../data/raw/uw_drought_response_actions/uw-drought-response","").replace(".xlsx","")
    clean_name = raw_name.replace("_","-")
    # ignore revision notes
    #trim_name = clean_name.split("-rev")[0]
    # get file date
    #file_date = clean_name.split("data")[1]
    print(clean_name)
    try:
        parsed_date = datetime.datetime.strptime(clean_name, '%m%d%y')
        print(parsed_date)
    except ValueError:
        print(f"skip ```{raw_name}``` invalid date")
    date_list.append(parsed_date)

070822
2022-07-08 00:00:00


In [189]:
latest = max(date_list)

In [190]:
latest_xl_file = f"../data/raw/uw_drought_response_actions/uw-drought-response{latest.strftime('%m%d%y')}.xlsx"

In [191]:
#file = "../data/raw/uw_drought_response_actions/uw-drought-response070822.xlsx"

In [192]:
pd.ExcelFile(latest_xl_file).sheet_names

['Jun14-May22 Conservation Data',
 '2020 Baseline Values',
 'Jul21-May22 relative to 2020']

In [6]:
df = pd.read_excel(file, sheet_name='Machine readable', parse_dates=["Reporting Month"])

Import clean names

In [7]:
clean_names = pd.read_csv("../data/metadata/urban-water-suppliers-clean-names.csv")

### Prep data

Take latest month

In [8]:
latest_df = df[df["Reporting Month"] == df["Reporting Month"].max()]

In [9]:
df["Supplier Name"] = df["Supplier Name"].str.replace("  ", " ")

In [10]:
df["Supplier Name"] = df["Supplier Name"].str.strip()

In [11]:
latest_df.head(1)

Unnamed: 0,Supplier Name,Public Water System ID,Reporting Month,Total Population Served,County,Hydrologic Region,Climate Zone,Water Shortage Contingency Stage Invoked,DWR State Standard Level corresponding to Stage,Enacted any measures,Not experiencing local shortage,Don't have the resources to enact any of the listed Response Activities,Not Enacted Other,Raising rates,Apply drought surcharges,Reduced allocations (for agencies with budget-based rates),Residential water audits,CII water audits,Expanded existing rebate program,Rationing,Turf replacement/rebate,Demand Reduction Other,Greywater,On-site treatment and reuse,Desalination,...,Door hanger,Workshops,Television,Radio,Billboard,Paid Media Advertising,Bus shelter,Communication Other (Fill-in),Assigned a different rate tier,Fine,Penalty Other,Notification via Door hanger,Notification via Letter,Notification via Other,Notification via Phone call,Notification via customer app,Number of Water Waste incidents identified or reported,Number of water waste complaints investigated,Number of water wasters notified,Number of water waste incidents resulting in penalties,Watering on wrong day,Over-irrigating and causing runoff,Watering sidewalk,Water use over budget,Water Waste Other
0,East Bay Municipal Utilities District,CA0110005,2022-05-15,1430000,"Alameda,Contra Costa",San Francisco Bay,3,Stage 2,5.0,Yes,,,,Y,Y,,Y,Y,Y,Y,,,,,,...,Y,Y,Y,Y,,,,Y,,,,Y,Y,,Y,,105.0,101.0,1.0,0.0,,,,,Y


In [12]:
merge_df = pd.merge(
    latest_df,
    clean_names[["id","display_name"]],
    how="left",
    left_on=["Public Water System ID"],
    right_on=["id"]
)

In [13]:
len(
    merge_df[merge_df["display_name"].isna()]
)

0

### List the main identifying columns

In [14]:
global_columns = [
    'Supplier Name', 
    'display_name',
    'Public Water System ID', 
    'Reporting Month',
    'Total Population Served', 
    'County', 
    'Hydrologic Region',
    'Climate Zone', 
]

In [15]:
drought_stage_columns = [
    # 'Water Shortage Contingency Stage Invoked', # junk
    'DWR State Standard Level corresponding to Stage',
    'Enacted any measures', 'Not experiencing local shortage',
    "Don't have the resources to enact any of the listed Response Activities",
    'Not Enacted Other'
]

In [146]:
df[ df["Supplier Name"].str.contains("Pismo Beach")]

Unnamed: 0,Supplier Name,Public Water System ID,Reporting Month,Total Population Served,County,Hydrologic Region,Climate Zone,Water Shortage Contingency Stage Invoked,DWR State Standard Level corresponding to Stage,Enacted any measures,Not experiencing local shortage,Don't have the resources to enact any of the listed Response Activities,Not Enacted Other,Raising rates,Apply drought surcharges,Reduced allocations (for agencies with budget-based rates),Residential water audits,CII water audits,Expanded existing rebate program,Rationing,Turf replacement/rebate,Demand Reduction Other,Greywater,On-site treatment and reuse,Desalination,...,Door hanger,Workshops,Television,Radio,Billboard,Paid Media Advertising,Bus shelter,Communication Other (Fill-in),Assigned a different rate tier,Fine,Penalty Other,Notification via Door hanger,Notification via Letter,Notification via Other,Notification via Phone call,Notification via customer app,Number of Water Waste incidents identified or reported,Number of water waste complaints investigated,Number of water wasters notified,Number of water waste incidents resulting in penalties,Watering on wrong day,Over-irrigating and causing runoff,Watering sidewalk,Water use over budget,Water Waste Other
1348,Pismo Beach City of,CA4010008,2022-01-15,8233,San Luis Obispo,Central Coast,5,3rd of 4,,Yes,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,Y,,,,2.0,2.0,2.0,0.0,,Y,Y,,
1349,Pismo Beach City of,CA4010008,2021-12-15,8233,San Luis Obispo,Central Coast,5,2nd of 4,,Yes,,,,,,,,,Y,,Y,,,,,...,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,,,,
1350,Pismo Beach City of,CA4010008,2021-09-15,8233,San Luis Obispo,Central Coast,5,2nd of 4,,Yes,,,,,,,,,Y,,,,,,,...,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,,,,
1351,Pismo Beach City of,CA4010008,2021-07-15,8233,San Luis Obispo,Central Coast,5,2nd of 4,,Yes,,,,,,,,,Y,,Y,,,,,...,,,,,,,,,,,,,Y,,,,4.0,4.0,4.0,0.0,Y,Y,Y,,


In [16]:
for c in drought_stage_columns:
    responses = df[c].unique()
    print('\033[1m' + c + '\033[0m')
    for n, r in enumerate(responses):
        print(f'    {(n+1)}. {r}')

[1mDWR State Standard Level corresponding to Stage[0m
    1. 5.0
    2. nan
    3. 2.0
    4. 3.0
    5. 0.0
    6. 1.0
    7. 4.0
[1mEnacted any measures[0m
    1. Yes
    2. nan
    3. No
[1mNot experiencing local shortage[0m
    1. nan
    2. Y
[1mDon't have the resources to enact any of the listed Response Activities[0m
    1. nan
    2. Y
[1mNot Enacted Other[0m
    1. nan
    2. Y


### Assign columns to "conservation action categories," such as water restrictions and water waste

In [17]:
cats = {
    # "Enhanced outreach and communication": "Demand", # not in data
    "Raising rates": "Demand",
    "Apply drought surcharges": "Demand",
    "Reduced allocations (for agencies with budget-based rates)": "Demand",
    "Residential water audits": "Demand",
    "CII water audits": "Demand",
    "Expanded existing rebate program": "Demand",
    "Rationing": "Demand",
    "Turf replacement/rebate": "Demand",
    "Demand Reduction Other": "Demand",
    "Greywater": "Supply",
    "On-site treatment and reuse": "Supply",
    "Desalination": "Supply",
    "Recycled Water": "Supply",
    "Remediated Groundwater": "Supply",
    "Supply Augmentation Other": "Supply",
    "Weekly watering restrictions": "Water Restrictions",
    "Excessive irrigation of outdoor landscapes": "Water Restrictions",
    "Washing a motor vehicle with a hose not fitted with a shut-off nozzle": "Water Restrictions",
    "Application of potable water directly to driveways or sidewalks": "Water Restrictions",
    "Use of potable water in decorative water features": "Water Restrictions",
    "The application of water to irrigate turf and ornamental landscapes during and within 48 hours after measurable rainfall": "Water Restrictions",
    "Restrictions Other": "Water Restrictions",
    "Not serving drinking water other than upon request in eating or drinking establishments": "Industry-Specific",
    "Operators of hotels and motels providing guests with the option of choosing not to have towels and linens laundered daily": "Industry-Specific",
    "Industry Other": "Industry-Specific",
    "E-mails": "Communication",
    "Paper mail": "Communication",
    "Notifications via Customer App": "Communication",
    "Website": "Communication",
    "Articles/News releases": "Communication",
    "Youtube": "Communication",
    "Facebook": "Communication",
    "Instagram": "Communication",
    "Other Social Media": "Communication",
    "Community events": "Communication",
    "Door hanger": "Communication",
    "Workshops": "Communication",
    "Television": "Communication",
    "Radio": "Communication",
    "Billboard": "Communication",
    "Paid Media Advertising": "Communication",
    "Bus shelter": "Communication",
    "Communication Other (Fill-in)": "Communication",
    "Notification via customer app": "Water Waste",
    "Notification via Phone call": "Water Waste",
    "Notification via Letter": "Water Waste",
    "Notification via Door hanger": "Water Waste",
    "Notification via Other": "Water Waste",
    "Fine": "Water Waste",
    "Assigned a different rate tier": "Water Waste",
    "Penalty Other": "Water Waste"
}

In [18]:
demand_cols = [k for k,v in cats.items() if v == "Demand"]

In [19]:
supply_cols = [k for k,v in cats.items() if v == "Supply"]

In [20]:
restrictions_cols = [k for k,v in cats.items() if v == "Water Restrictions"]

In [21]:
comms_cols = [k for k,v in cats.items() if v == "Communications"]

In [22]:
waste_cols = [k for k,v in cats.items() if v == "Water Waste"]

### Restrictions

In [23]:
restrictions_df = merge_df[global_columns + restrictions_cols]

In [24]:
restrictions_long = pd.melt(
    restrictions_df, 
    id_vars=global_columns,
    var_name="restriction",
    value_name="is_implemented"
)

In [25]:
#restrictions_long.groupby("restriction")["is_implemented"].count().reset_index()

In [26]:
rewrite_restrictions_text = {
    'Weekly watering restrictions': 'Weekly watering restrictions',
    'Excessive irrigation of outdoor landscapes': 'No overwatering yards',
    'Washing a motor vehicle with a hose not fitted with a shut-off nozzle': 'No washing cars without a hose fitted with a shutoff nozzle',
    'Application of potable water directly to driveways or sidewalks': 'No hosing down sidewalks',
    'Use of potable water in decorative water features': 'No using potable water in decorative water features',
    'The application of water to irrigate turf and ornamental landscapes during and within 48 hours after measurable rainfall': 'No watering grass within 48 hours after rainfall',
    'Restrictions Other': 'Other restrictions'
}

In [27]:
restrictions_long["restriction_text"] = restrictions_long["restriction"].map(rewrite_restrictions_text)

In [94]:
restrictions_list = restrictions_long[
    #(restrictions_long["display_name"] == supplier) &
    (restrictions_long.is_implemented == "Y") & 
    # throw out 'other' category
    (restrictions_long.restriction != "Restrictions Other")
]

In [95]:
supplier = "Los Angeles Department of Water and Power"
month = pd.to_datetime(restrictions_list.iloc[0]["Reporting Month"]).month_name()
restrictions_test_text = restrictions_list[
    (restrictions_long["display_name"] == supplier)
]

  restrictions_test_text = restrictions_list[


In [96]:
print(f"As of {month}, the {supplier} has implemented the following steps to address its water shortage:")
for row in (list(restrictions_test_text.restriction_text)):
    print(f"    • {row}")

As of May, the Los Angeles Department of Water and Power has implemented the following steps to address its water shortage:
    • Weekly watering restrictions
    • No overwatering yards
    • No washing cars without a hose fitted with a shutoff nozzle
    • No hosing down sidewalks
    • No using potable water in decorative water features
    • No watering grass within 48 hours after rainfall


In [140]:
renamed_restrictions = restrictions_df.rename(columns={
    "Supplier Name": "supplier_name",
    "Public Water System ID": "pwsid",
    "Reporting Month": "reporting_month"
})

restrictions_list = restrictions_list.rename(columns={
    "Supplier Name": "supplier_name",
    "Public Water System ID": "pwsid",
    "Reporting Month": "reporting_month"
})

In [141]:
restrictions_trim = renamed_restrictions[["display_name","pwsid","reporting_month"]]

In [142]:
def get_timeseries(pwsid):
    ccn_df = restrictions_list[restrictions_list.pwsid == pwsid]
    return list(ccn_df.restriction_text)
restrictions_trim["list"] = restrictions_trim.pwsid.apply(get_timeseries)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  restrictions_trim["list"] = restrictions_trim.pwsid.apply(get_timeseries)


In [143]:
restrictions_trim#.iloc[0]['list']

Unnamed: 0,display_name,pwsid,reporting_month,list
0,East Bay Municipal Utilities District,CA0110005,2022-05-15,"[No overwatering yards, No washing cars withou..."
1,Yorba Linda Water District,CA3010037,2022-05-15,[]
2,City of Long Beach,CA1910065,2022-05-15,"[Weekly watering restrictions, No overwatering..."
3,City of Los Banos,CA2410005,2022-05-15,"[Weekly watering restrictions, No washing cars..."
4,City of Turlock,CA5010019,2022-05-15,"[Weekly watering restrictions, No overwatering..."
...,...,...,...,...
354,Mountain House Community Services District,CA3910027,2022-05-15,"[Weekly watering restrictions, No overwatering..."
355,Cloverdale,CA4910002,2022-05-15,"[Weekly watering restrictions, No overwatering..."
356,Greenfield County Water District,CA1510024,2022-05-15,"[Weekly watering restrictions, No overwatering..."
357,City of Signal Hill,CA1910149,2022-05-15,"[Weekly watering restrictions, No overwatering..."


In [144]:
restrictions_trim.to_csv(
    "../data/processed/restrictions/latest.csv",
    index=False
)

### Water waste

In [32]:
#waste_df = merge_df[global_columns + waste_cols]

In [33]:
#waste_long = pd.melt(waste_df, id_vars=global_columns)

In [34]:
# waste_list = waste_long[
#     (waste_long["display_name"] == supplier)
# ]

In [35]:
rdf = pd.read_excel(file, sheet_name='Listed responses', parse_dates=["Reporting Month"])

In [36]:
rdf[ 
    (rdf["Supplier Name"] == supplier) & 
    (rdf["Reporting Month"] == rdf["Reporting Month"].max()) 
]['Type of water waste']

123    Watering on wrong day, Over-irrigating and cau...
Name: Type of water waste, dtype: object

In [37]:
latest_rdf = rdf[ 
    (rdf["Reporting Month"] == rdf["Reporting Month"].max()) 
]

In [56]:
waste_df = rdf[
       ['Supplier Name', 'Public Water System ID', 'Reporting Month', 'Waste Actions', 'Water waste comments',
       'Type of water waste',
       'Number of Water Waste incidents identified or reported',
       'Number of water waste complaints investigated',
       'Number of water wasters notified',
       'Number of water waste incidents resulting in penalties']
].rename(columns={
    'Supplier Name': 'supplier_name', 
    'Public Water System ID': 'pswid', 
    'Reporting Month': 'reporting_month',
    'Waste Actions': 'actions', 
    'Water waste comments': 'comments', 
    'Type of water waste': 'type',
    'Number of Water Waste incidents identified or reported': 'incidents_reported',
    'Number of water waste complaints investigated': 'complaints_investigated',
    'Number of water wasters notified': 'wasters_notified',
    'Number of water waste incidents resulting in penalties': 'penalties'
})

In [62]:
waste_grouped = waste_df.groupby('reporting_month')[['incidents_reported','complaints_investigated','wasters_notified', 'penalties']].sum().reset_index()

In [86]:
alt.Chart(
    waste_df[waste_df.supplier_name == "Los Angeles Department of Water and Power"]
    #waste_grouped
).mark_bar().encode(
    x="yearmonthdate(reporting_month):O",
    y="incidents_reported",
    tooltip=["reporting_month","incidents_reported"]
).properties(
    width=600
)