In [1]:
import pytz
from datetime import datetime, date, timedelta
from time import strptime
import pandas as pd
import altair as alt
import altair_latimes as lat

In [2]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [3]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [4]:
df = pd.read_csv(
    "../../data/raw/reservoirs/statewide/timeseries.csv", 
    parse_dates=["DATE TIME", "OBS DATE"]
)

In [5]:
hist_df = pd.read_csv(
    "../../data/metadata/reservoirs-statewide-historical-averages.csv",
    dtype={"month":str}
)

In [6]:
details_df = pd.read_csv(
    "../../data/metadata/reservoirs-statewide-details.csv"
)

### Clean

In [7]:
df.columns = df.columns.str.lower()

In [8]:
df.columns = df.columns.str.replace(" ","_")

In [9]:
df = df.rename(columns={
    "station_id": "reservoir_id",
    "date_time": "date",
    "value": "storage_af"
})

### Filter to last 365 days from today

In [10]:
today = datetime.today().date()
today

datetime.date(2024, 3, 12)

In [11]:
last_year = (today - pd.DateOffset(days=365)).date()
last_year

datetime.date(2023, 3, 13)

Trim to last 365 days and remove invalid values

In [12]:
trim_df = df[
    (df.date >= pd.to_datetime(last_year))
][ (df.storage_af != "---") ].copy()

  trim_df = df[


### Merge historical average

Create month column for merging

In [13]:
trim_df["month"] = pd.DatetimeIndex(trim_df.date).month

In [14]:
hist_df = hist_df.drop(columns="month", axis=1).rename(columns={"month_int":"month"})

In [15]:
merge_historical = pd.merge(
    trim_df[["reservoir_id","date","month","storage_af"]],
    hist_df[["reservoir_id", "month", "average_storage"]],
    how="left",
    left_on=["reservoir_id","month"],
    right_on=["reservoir_id","month"]
)

Merge details

In [16]:
merge_details = pd.merge(
    merge_historical,
    details_df[["reservoir_id", "lake_name", "capacity", "number_of_dams"]],
    how="left",
    on=["reservoir_id"]               
)

In [17]:
merge_details.loc[merge_details.lake_name == "Total", "lake_name"] = "Statewide"

### Calculate percentages

In [18]:
merge_details["storage_af"] = merge_details["storage_af"].astype(int)

In [19]:
merge_details["current_level_pct_of_total"] = merge_details["storage_af"] / merge_details["capacity"]

In [20]:
merge_details["average_level_pct_of_total"] = merge_details["average_storage"] / merge_details["capacity"]

In [21]:
merge_details["current_level_pct_of_avg"] = merge_details["storage_af"] / merge_details["average_storage"]

### Chart

In [22]:
melt=pd.melt(
    merge_details, 
    id_vars="date", 
    value_vars=["current_level_pct_of_total", "average_level_pct_of_total"]
)

In [23]:
melt.head(1)

Unnamed: 0,date,variable,value
0,2023-03-13,current_level_pct_of_total,0.664


In [24]:
alt.Chart(melt).mark_line().encode(
    x="date",
    y="value",
    color="variable",
    tooltip="date"
).properties(width=600)

### Export

Trim and rename for export

In [25]:
export_df = merge_details[
    [
         'date', 
         'storage_af', 
         'average_storage', 
         'capacity', 
         'current_level_pct_of_total',
         'average_level_pct_of_total', 
         'current_level_pct_of_avg' 
    ]
].rename(columns={"lake_name": "reservoir_name"})

In [26]:
export_df.to_csv(f"../../data/processed/reservoirs/statewide-timeseries.csv", index=False)

In [27]:
export_df[export_df.date == export_df.date.max()].to_csv(f"../../data/processed/reservoirs/statewide-latest.csv", index=False)