In [2]:
import pytz
from datetime import datetime, date, timedelta
import pandas as pd
import altair as alt
import altair_latimes as lat

In [3]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [4]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Import

In [5]:
# df = pd.read_csv(
#     "../data/raw/reservoirs/colorado-river-reservoir-scrape-timeseries.csv", 
#     parse_dates=["DATE TIME", "OBS DATE"]
# )

In [6]:
df = pd.read_csv(
    "../data/raw/reservoirs/colorado-river-reservoir-bor-timeseries.csv", 
    parse_dates=["date"]
)

In [7]:
hist_df = pd.read_csv(
    "../data/metadata/reservoirs-historical-averages.csv"
)

In [8]:
details_df = pd.read_csv(
    "../data/metadata/reservoirs-metadata-details.csv"
)

### Clean

In [9]:
df.columns = df.columns.str.lower()

In [10]:
df.columns = df.columns.str.replace(" ","_")

In [11]:
df = df.rename(columns={
    "station_id": "reservoir_id",
    "date_time": "date",
    #"value": "storage_af"
    "storage": "storage_af"
})

In [13]:
#df["storage_af"] = df["storage_af"].str.replace("---","")

In [14]:
#df["storage_af"] = pd.to_numeric(df["storage_af"])

In [15]:
# details_df["lat"] = details_df["lat"].str.replace("°","")
# details_df["lon"] = details_df["lon"].str.replace("°","")

In [16]:
df

Unnamed: 0,reservoir_id,reservoir_name,date,storage_af
0,MHV,Lake Mohave,1950-02-02,220200.000
1,MHV,Lake Mohave,1950-02-03,224700.000
2,MHV,Lake Mohave,1950-02-04,223500.000
3,MHV,Lake Mohave,1950-02-05,218000.000
4,MHV,Lake Mohave,1950-02-06,219200.000
...,...,...,...,...
109711,HVS,Lake Havasu,2022-07-08,591640.000
109712,HVS,Lake Havasu,2022-07-09,588089.800
109713,HVS,Lake Havasu,2022-07-10,584268.000
109714,HVS,Lake Havasu,2022-07-11,580698.400


### Merge historical average

Create month column for merging

In [17]:
df["month"] = pd.DatetimeIndex(df.date).month

In [18]:
hist_df["month"] = pd.to_datetime(hist_df["month"], format='%B').dt.month

In [19]:
merge_historical = pd.merge(
    df[["reservoir_id","date","month","storage_af"]],
    hist_df[["reservoir_id", "month", "average_storage_value", "average_storage_unit"]],
    how="left",
    on=["reservoir_id","month"]
)

In [20]:
merge_details = pd.merge(
    merge_historical,
    details_df[["id", "lake", "capacity", "lat", "lon"]],
    how="left",
    left_on=["reservoir_id"],
    right_on=["id"]
).drop(["id", "average_storage_unit"], axis=1)

### Calculate percentages

In [21]:
merge_details["current_level_pct_of_total"] = merge_details["storage_af"] / merge_details["capacity"]

In [22]:
merge_details["average_level_pct_of_total"] = merge_details["average_storage_value"] / merge_details["capacity"]

In [23]:
merge_details["current_level_pct_of_avg"] = merge_details["storage_af"] / merge_details["average_storage_value"]

### Check out latest data

Drop na first

In [24]:
drop_na = merge_details.dropna(subset=["storage_af"])

In [25]:
latest_df = drop_na[drop_na.date == drop_na.date.max()].copy()

In [26]:
latest_df

Unnamed: 0,reservoir_id,date,month,storage_af,average_storage_value,lake,capacity,lat,lon,current_level_pct_of_total,average_level_pct_of_total,current_level_pct_of_avg
26458,MHV,2022-07-12,7,1705575.0,1664313,Lake Mohave,1810000.0,35.197000°,-114.567000°,0.942,0.92,1.025
79114,MEA,2022-07-12,7,7099346.24,15536432,Lake Mead,26159008.0,36.016000°,-114.736000°,0.271,0.594,0.457
109715,HVS,2022-07-12,7,576427.9,582876,Lake Havasu,648000.0,34.317000°,-114.156000°,0.89,0.899,0.989


In [27]:
melt=pd.melt(latest_df, id_vars="reservoir_id", value_vars=["storage_af", "average_storage_value", "capacity"])

In [28]:
bar_order = {
    "storage_af": 1, 
    "average_storage_value": 2, 
    "capacity":3            
}

In [29]:
melt["bar_order"] = melt.variable.map(bar_order)

In [30]:
domain = ["capacity", "average_storage_value", "storage_af", ]
range_ = ['#ddd', '#83c6e0', '#1281aa', ]

alt.Chart(melt).mark_bar().encode(
    x=alt.X('reservoir_id', axis=alt.Axis(labels=False)),
    y=alt.Y('value',stack="normalize"),
    order="bar_order",
    color=alt.Color(
        'variable', 
        scale=alt.Scale(domain=domain, range=range_), 
        #sort='descending'
    ),
    tooltip=["reservoir_id"]
).properties(title="Reservoirs of the lower Colorado River Basin")

### Export

Trim and rename for export

In [31]:
export_df = latest_df[
    ['reservoir_id','lake', 'date', 'storage_af', 'average_storage_value', 'capacity', 'current_level_pct_of_total',
       'average_level_pct_of_total', 'current_level_pct_of_avg', 'lat', 'lon' ]
].rename(columns={
    'lake': 'reservoir_name',
    'average_storage_value': 'historical_average',
    'capacity': 'total_capacity'
}).sort_values("total_capacity", ascending=False)

In [32]:
export_df.to_csv("../data/processed/reservoirs/colorado-river-reservoirs-latest.csv", index=False)