In [5]:
import pytz
from datetime import datetime, date, timedelta
import pandas as pd
import altair as alt
import altair_latimes as lat

In [6]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [7]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [8]:
df = pd.read_csv(
    "../data/raw/bor/colorado-river-reservoir-bor-timeseries.csv", 
    parse_dates=["date"]
)

In [9]:
hist_df = pd.read_csv(
    "../data/metadata/reservoirs-historical-averages.csv"
)

In [10]:
details_df = pd.read_csv(
    "../data/metadata/reservoirs-metadata-details.csv"
)

### Clean

In [11]:
df.columns = df.columns.str.lower()

In [12]:
df.columns = df.columns.str.replace(" ","_")

In [13]:
df = df.rename(columns={
    "station_id": "reservoir_id",
    "date_time": "date",
    "storage": "storage_af"
})

### Filter to last 365 days from today

In [14]:
today = datetime.today().date()
today

datetime.date(2024, 7, 2)

In [15]:
last_year = (today - pd.DateOffset(days=365)).date()
last_year

datetime.date(2023, 7, 3)

Trim to last 365 days and remove invalid values

In [16]:
trim_df = df[
    (df.date >= pd.to_datetime(last_year))
].copy()

### Merge historical average

Create month column for merging

In [17]:
trim_df["month"] = pd.DatetimeIndex(trim_df.date).month

In [19]:
month_names = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12,
}

In [20]:
hist_df["month"] = hist_df["month"].map(month_names)

In [21]:
merge_historical = pd.merge(
    trim_df[["reservoir_id","date","month","storage_af"]],
    hist_df[["reservoir_id", "month", "average_storage_value", "average_storage_unit"]],
    how="left",
    on=["reservoir_id","month"]
)

In [22]:
merge_details = pd.merge(
    merge_historical,
    details_df[["id", "lake", "capacity", "lat", "lon"]],
    how="left",
    left_on=["reservoir_id"],
    right_on=["id"]
).drop(["id", "average_storage_unit"], axis=1)

### Calculate percentages

In [23]:
merge_details["current_level_pct_of_total"] = merge_details["storage_af"] / merge_details["capacity"]

In [24]:
merge_details["average_level_pct_of_total"] = merge_details["average_storage_value"] / merge_details["capacity"]

In [25]:
merge_details["current_level_pct_of_avg"] = merge_details["storage_af"] / merge_details["average_storage_value"]

In [26]:
merge_details

Unnamed: 0,reservoir_id,date,month,storage_af,average_storage_value,lake,capacity,lat,lon,current_level_pct_of_total,average_level_pct_of_total,current_level_pct_of_avg
0,MHV,2023-07-03,7,1699277.500,1664313,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.939,0.920,1.021
1,MHV,2023-07-04,7,1703292.500,1664313,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.941,0.920,1.023
2,MHV,2023-07-05,7,1707527.500,1664313,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.943,0.920,1.026
3,MHV,2023-07-06,7,1714787.500,1664313,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.947,0.920,1.030
4,MHV,2023-07-07,7,1716245.000,1664313,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.948,0.920,1.031
...,...,...,...,...,...,...,...,...,...,...,...,...
1455,HVS,2024-06-27,6,601760.000,588950,Lake Havasu,648000.000,34.317000°,-114.156000°,0.929,0.909,1.022
1456,HVS,2024-06-28,6,597285.400,588950,Lake Havasu,648000.000,34.317000°,-114.156000°,0.922,0.909,1.014
1457,HVS,2024-06-29,6,596354.200,588950,Lake Havasu,648000.000,34.317000°,-114.156000°,0.920,0.909,1.013
1458,HVS,2024-06-30,6,594938.000,588950,Lake Havasu,648000.000,34.317000°,-114.156000°,0.918,0.909,1.010


### Drop missing values

In [27]:
drop_na_df = merge_details.dropna(subset=["storage_af"])

### Chart

In [28]:
melt = pd.melt(
    drop_na_df,
    id_vars=["date","lake"],
    value_vars=["current_level_pct_of_total", "average_level_pct_of_total"]
)

In [29]:
# alt.data_transformers.disable_max_rows()

# alt.Chart(melt).mark_line().encode(
#     x='date:T',
#     y='value',
#     color='variable',
#     column='lake'
# ).properties(
#     width=180,
#     height=180
# )

### Export

Trim and rename for export

In [30]:
export_df = drop_na_df[
    ['reservoir_id',
     'lake', 
     'date', 
     'storage_af', 
     'average_storage_value', 
     'capacity', 
     'current_level_pct_of_total',
     'average_level_pct_of_total', 
     'current_level_pct_of_avg', 
     'lat', 
     'lon' ]
].rename(columns={
    'lake': 'reservoir_name',
    'average_storage_value': 'historical_average',
    'capacity': 'total_capacity'
}).sort_values("total_capacity", ascending=False)

In [31]:
export_df.to_csv("../data/processed/bor/colorado-river-reservoirs-timeseries.csv", index=False)

In [32]:
export_df[export_df.date == export_df.date.max()].to_csv(f"../data/processed/bor/colorado-latest.csv", index=False)