In [36]:
import pytz
from datetime import datetime, date, timedelta
import pandas as pd
import altair as alt
import altair_latimes as lat

In [37]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [38]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [39]:
df = pd.read_csv(
    "../data/raw/reservoirs/statewide/timeseries.csv", 
    parse_dates=["DATE TIME", "OBS DATE"]
)

In [40]:
hist_df = pd.read_csv(
    "../data/metadata/reservoirs-statewide-historical-averages.csv"
)

In [41]:
details_df = pd.read_csv(
    "../data/metadata/reservoirs-statewide-details.csv"
)

### Clean

In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 357 entries, 0 to 356
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   STATION_ID     357 non-null    object        
 1   DURATION       357 non-null    object        
 2   SENSOR_NUMBER  357 non-null    int64         
 3   SENSOR_TYPE    357 non-null    object        
 4   DATE TIME      357 non-null    datetime64[ns]
 5   OBS DATE       357 non-null    datetime64[ns]
 6   VALUE          357 non-null    object        
 7   DATA_FLAG      357 non-null    object        
 8   UNITS          357 non-null    object        
dtypes: datetime64[ns](2), int64(1), object(6)
memory usage: 25.2+ KB


In [43]:
df.columns = df.columns.str.lower()

In [44]:
df.columns = df.columns.str.replace(" ","_")

In [45]:
df = df.rename(columns={
    "station_id": "reservoir_id",
    "date_time": "date",
    "value": "storage_af"
})

### Check out latest data

In [46]:
valid_entries = df[ (df.storage_af != "---") ]

In [47]:
latest_df = valid_entries[valid_entries.date == valid_entries.date.max()].copy()

In [48]:
#df["storage_af"] = df["storage_af"].str.replace("---","")

In [49]:
latest_df["storage_af"] = pd.to_numeric(latest_df["storage_af"])

In [50]:
#df = df.dropna(subset="storage_af")

In [51]:
# tz = pytz.timezone("America/Los_Angeles")

In [52]:
# today = datetime.now(tz).date()
# today

In [53]:
# yesterday = (today - pd.DateOffset(days=1)).date()
# yesterday

In [54]:
# latest_df = df[df.date == pd.to_datetime(yesterday)].copy()

In [55]:
len(latest_df)

1

In [56]:
latest_df.storage_af.sum()

15000000

### Merge historical average

Create month column for merging

In [57]:
latest_df["month"] = pd.DatetimeIndex(latest_df.date).month

In [58]:
hist_df["month"] = pd.to_datetime(hist_df["month"], format='%B').dt.month

In [59]:
merge_historical = pd.merge(
    latest_df[["reservoir_id","date","month","storage_af"]],
    hist_df[["reservoir_id", "month", "average_storage"]],
    how="left",
    on=["reservoir_id","month"]
)

In [60]:
merge_details = pd.merge(
    merge_historical,
    details_df[["reservoir_id", "lake_name", "capacity", "number_of_dams"]],
    how="left",
    on=["reservoir_id"]               
)

In [61]:
merge_details.loc[merge_details.lake_name == "Total", "lake_name"] = "Statewide"

### Calculate percentages

In [62]:
merge_details["current_level_pct_of_total"] = merge_details["storage_af"] / merge_details["capacity"]

In [63]:
merge_details["average_level_pct_of_total"] = merge_details["average_storage"] / merge_details["capacity"]

In [64]:
merge_details["current_level_pct_of_avg"] = merge_details["storage_af"] / merge_details["average_storage"]

In [65]:
melt=pd.melt(
    merge_details, 
    id_vars="reservoir_id", 
    value_vars=["storage_af", "average_storage", "capacity"]
)

In [66]:
bar_order = {
    "storage_af": 1, 
    "average_storage": 2, 
    "capacity":3            
}

In [67]:
melt["bar_order"] = melt.variable.map(bar_order)

In [68]:
domain = ["capacity", "average_storage", "storage_af", ]
range_ = ['#ddd', '#83c6e0', '#1281aa', ]

alt.Chart(melt).mark_bar().encode(
    x=alt.X('reservoir_id', axis=alt.Axis(labels=False)),
    y=alt.Y('value',stack="normalize"),
    order="bar_order",
    color=alt.Color(
        'variable', 
        scale=alt.Scale(domain=domain, range=range_), 
        #sort='descending'
    ),
    tooltip=["reservoir_id"]
).properties(title="Statewide water storage", width=150)

### Export

Trim and rename for export

In [69]:
export_df = merge_details[
    ['reservoir_id',
     'lake_name', 
     'number_of_dams',
     'date', 
     'storage_af', 
     'average_storage', 
     'capacity', 
     'current_level_pct_of_total',
     'average_level_pct_of_total', 
     'current_level_pct_of_avg' ]
].rename(columns={"lake_name": "reservoir_name"})

In [70]:
export_df.to_csv(f"../data/processed/reservoirs/reservoirs-statewide-latest.csv", index=False)