In [1]:
import numpy as np
from datetime import date
import pandas as pd
import altair as alt
import altair_latimes as lat

In [2]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Import

In [3]:
state_df = pd.read_csv("../../data/raw/snow/statewide-timeseries.csv", parse_dates=["date"])

In [4]:
normal_df = pd.read_csv("../../data/processed/snow/normal-swc.csv")

### Clean

Delete leap year day

In [5]:
normal_df = normal_df[
    ~((normal_df.month == 2) & (normal_df.day == 29))
]

In [6]:
normal_df["month"] = normal_df["month"].astype(str).str.zfill(2)
normal_df["day"] = normal_df["day"].astype(str).str.zfill(2)

In [7]:
normal_df["month-day"] = normal_df["month"] + "-" + normal_df["day"].astype(str)

In [8]:
state_df["month-day"] = state_df.date.dt.month.astype(str).str.zfill(2) + "-" + state_df.date.dt.day.astype(str).str.zfill(2)

### Get current water year

In [9]:
def getWaterYear(date):
    month = date.month
    day = date.day

    if month >= 10:
        water_year = date.year + 1
    else:
        water_year = date.year

    return water_year

In [10]:
today = date.today()

In [11]:
current_water_year = getWaterYear(today)
current_water_year

2024

### Merge to a series with the entire year

In [12]:
wy_start = str(current_water_year - 1)

In [13]:
start = pd.to_datetime( wy_start + "-10-01")

In [14]:
end = pd.to_datetime( str(current_water_year) + "-09-30")

In [15]:
print(start,"to", end)

2023-10-01 00:00:00 to 2024-09-30 00:00:00


In [16]:
dates = pd.Series(pd.date_range(start, end, freq="D"))
days = dates.diff().astype("timedelta64[D]").fillna(1).cumsum()
wy_df = pd.DataFrame({"year": dates.dt.year, "month": dates.dt.month, "month-day": dates.dt.month.astype(str).str.zfill(2) + "-" + dates.dt.day.astype(str).str.zfill(2)})
wy_df = wy_df.set_index(dates)
wy_df = wy_df.reset_index().rename(columns={"index":"date"})
wy_df

Unnamed: 0,date,year,month,month-day
0,2023-10-01,2023,10,10-01
1,2023-10-02,2023,10,10-02
2,2023-10-03,2023,10,10-03
3,2023-10-04,2023,10,10-04
4,2023-10-05,2023,10,10-05
...,...,...,...,...
361,2024-09-26,2024,9,09-26
362,2024-09-27,2024,9,09-27
363,2024-09-28,2024,9,09-28
364,2024-09-29,2024,9,09-29


In [17]:
merge = pd.merge(
    wy_df,
    state_df,
    how="left",
    on=["date","month-day"]
)

In [18]:
merge_normals = pd.merge(
    merge,
    normal_df.drop(columns=["month","day"], axis=1),
    how="left",
    on=["month-day"]
)

In [19]:
merge_normals.head()

Unnamed: 0,date,year,month,month-day,station_id,snwcavg,pctnorm,pctapr1,avg,wmonth,norm,avgAvgSwc
0,2023-10-01,2023,10,10-01,,,,,0.0,1.0,0.0,-0.01
1,2023-10-02,2023,10,10-02,,,,,0.0,1.0,0.03,0.0
2,2023-10-03,2023,10,10-03,,,,,0.0,1.0,0.0,-0.01
3,2023-10-04,2023,10,10-04,,,,,0.0,1.0,0.0,-0.01
4,2023-10-05,2023,10,10-05,,,,,0.001,1.0,0.0,0.0


In [20]:
#(merge_normals.loc[ (merge_normals['month-day'] == "1-20") ]['avg_pct_apr1'].sum()+merge_normals.loc[ (merge_normals['month-day'] == "1-22") ]['avg_pct_apr1'].sum())/2

In [21]:
merge_normals["pctapr1"] = merge_normals["pctapr1"] * 100
merge_normals["pctnorm"] = merge_normals["pctnorm"] * 100

In [22]:
merge_normals["avg"] = merge_normals["avg"] * 100

In [23]:
melt = pd.melt(
    merge_normals,
    id_vars="date",
    value_vars=["pctapr1","avg"]
)

In [24]:
alt.Chart(melt[melt.date >= "2022-10-01"]).mark_line().encode(
    x="date",
    y="value",
    color="variable",
    tooltip=["date","value"]
).properties(title="Snowpack as percentage of peak",width=600)

### Trim and rename columns

In [25]:
trim_df = merge_normals[[
    "date",
    "snwcavg",
    "pctnorm",
    "pctapr1",
    "avg",
    "avgAvgSwc"
]]

In [26]:
rename_df = trim_df.rename(columns={
    "snwcavg":"swc_avg",
    "pctnorm":"pct_normal",
    "pctapr1":"pct_apr_1",
    "avg": "apr_1_baseline_pct",
    "avgAvgSwc": "apr_1_baseline_value"
})

In [27]:
rename_df.to_csv("../../data/processed/snow/snow-water-equivalent.csv", index=False)