In [1]:
import pytz
from datetime import datetime, date, timedelta
import pandas as pd
import altair as alt
import altair_latimes as lat

In [2]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [3]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [5]:
df = pd.read_csv(
    "../../data/raw/reservoirs/colorado-river-reservoir-bor-timeseries.csv", 
    parse_dates=["date"]
)

In [36]:
hist_df = pd.read_csv(
    "../../data/metadata/reservoirs-historical-averages.csv"
)

In [7]:
details_df = pd.read_csv(
    "../../data/metadata/reservoirs-metadata-details.csv"
)

In [37]:
hist_df

Unnamed: 0,month,average_storage,reservoir_id,average_storage_value,average_storage_unit
0,January,"165,874 af",ATN,165874,af
1,February,"180,481 af",ATN,180481,af
2,March,"194,157 af",ATN,194157,af
3,April,"197,293 af",ATN,197293,af
4,May,"193,537 af",ATN,193537,af
...,...,...,...,...,...
1927,August,"212,098 af",KLM,212098,af
1928,September,"171,318 af",KLM,171318,af
1929,October,"177,005 af",KLM,177005,af
1930,November,"212,214 af",KLM,212214,af


### Clean

In [38]:
df.columns = df.columns.str.lower()

In [39]:
df.columns = df.columns.str.replace(" ","_")

In [40]:
df = df.rename(columns={
    "station_id": "reservoir_id",
    "date_time": "date",
    "storage": "storage_af"
})

### Filter to last 365 days from today

In [41]:
today = datetime.today().date()
today

datetime.date(2023, 5, 12)

In [42]:
last_year = (today - pd.DateOffset(days=365)).date()
last_year

datetime.date(2022, 5, 12)

Trim to last 365 days and remove invalid values

In [43]:
trim_df = df[
    (df.date >= pd.to_datetime(last_year))
].copy()

### Merge historical average

Create month column for merging

In [44]:
trim_df["month"] = pd.DatetimeIndex(trim_df.date).month

In [45]:
month_names = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12,
}

In [46]:
hist_df["month"] = hist_df["month"].map(month_names)

In [47]:
merge_historical = pd.merge(
    trim_df[["reservoir_id","date","month","storage_af"]],
    hist_df[["reservoir_id", "month", "average_storage_value", "average_storage_unit"]],
    how="left",
    on=["reservoir_id","month"]
)

In [48]:
merge_details = pd.merge(
    merge_historical,
    details_df[["id", "lake", "capacity", "lat", "lon"]],
    how="left",
    left_on=["reservoir_id"],
    right_on=["id"]
).drop(["id", "average_storage_unit"], axis=1)

### Calculate percentages

In [49]:
merge_details["current_level_pct_of_total"] = merge_details["storage_af"] / merge_details["capacity"]

In [50]:
merge_details["average_level_pct_of_total"] = merge_details["average_storage_value"] / merge_details["capacity"]

In [51]:
merge_details["current_level_pct_of_avg"] = merge_details["storage_af"] / merge_details["average_storage_value"]

In [52]:
merge_details

Unnamed: 0,reservoir_id,date,month,storage_af,average_storage_value,lake,capacity,lat,lon,current_level_pct_of_total,average_level_pct_of_total,current_level_pct_of_avg
0,MHV,2022-05-12,5,1703017.500,1715533,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.941,0.948,0.993
1,MHV,2022-05-13,5,1706730.000,1715533,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.943,0.948,0.995
2,MHV,2022-05-14,5,1712230.000,1715533,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.946,0.948,0.998
3,MHV,2022-05-15,5,1711295.000,1715533,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.945,0.948,0.998
4,MHV,2022-05-16,5,1719407.500,1715533,Lake Mohave,1810000.000,35.197000°,-114.567000°,0.950,0.948,1.002
...,...,...,...,...,...,...,...,...,...,...,...,...
1455,HVS,2023-05-07,5,584772.400,593833,Lake Havasu,648000.000,34.317000°,-114.156000°,0.902,0.916,0.985
1456,HVS,2023-05-08,5,589118.000,593833,Lake Havasu,648000.000,34.317000°,-114.156000°,0.909,0.916,0.992
1457,HVS,2023-05-09,5,589350.800,593833,Lake Havasu,648000.000,34.317000°,-114.156000°,0.909,0.916,0.992
1458,HVS,2023-05-10,5,587895.800,593833,Lake Havasu,648000.000,34.317000°,-114.156000°,0.907,0.916,0.990


### Drop missing values

In [53]:
drop_na_df = merge_details.dropna(subset=["storage_af"])

### Chart

In [54]:
melt = pd.melt(
    drop_na_df,
    id_vars=["date","lake"],
    value_vars=["current_level_pct_of_total", "average_level_pct_of_total"]
)

In [55]:
alt.data_transformers.disable_max_rows()

alt.Chart(melt).mark_line().encode(
    x='date:T',
    y='value',
    color='variable',
    column='lake'
).properties(
    width=180,
    height=180
)

### Export

Trim and rename for export

In [57]:
export_df = drop_na_df[
    ['reservoir_id',
     'lake', 
     'date', 
     'storage_af', 
     'average_storage_value', 
     'capacity', 
     'current_level_pct_of_total',
     'average_level_pct_of_total', 
     'current_level_pct_of_avg', 
     'lat', 
     'lon' ]
].rename(columns={
    'lake': 'reservoir_name',
    'average_storage_value': 'historical_average',
    'capacity': 'total_capacity'
}).sort_values("total_capacity", ascending=False)

In [59]:
export_df.to_csv("../../data/processed/reservoirs/colorado-river-reservoirs-timeseries.csv", index=False)

In [None]:
export_df[export_df.date == export_df.date.max()].to_csv(f"../../data/processed/reservoirs/colorado-latest.csv", index=False)