In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pathlib as pl
from scipy.stats import norm

In [None]:
start_date = pd.to_datetime("1962-01-01 00:00:00")
end_calibration = pd.to_datetime("2006-01-01 00:00:00")
end_period_two = pd.to_datetime("2016-01-01 00:00:00")
end_period_three = pd.to_datetime("2026-01-01 00:00:00")

end_periods = [end_calibration, end_period_two, end_period_three]

In [None]:
totim_end = [float((end_calibration - start_date).days)]
totim_end += [float((end_period_two - start_date).days)]
totim_end += [float((end_period_three - start_date).days)]
totim_end

In [None]:
df = pd.read_csv("data/USW00093738.csv", skiprows=1, parse_dates=True, index_col=0)
df = df[df.index >= start_date]

In [None]:
drop_cols = [
    "TAVG (Degrees Fahrenheit)",
    "TMAX (Degrees Fahrenheit)",
    "TMIN (Degrees Fahrenheit)",
    "SNOW (Inches)",
    "SNWD (Inches)",
]
df.drop(columns=drop_cols, inplace=True)

In [None]:
df

In [None]:
df.fillna(0, inplace=True)

In [None]:
df.columns

In [None]:
tag = "PRCP (Inches)"
df[tag]

In [None]:
df[tag].plot()

In [None]:
annual_df = df.resample("YE").sum()
annual_calibration_mean = annual_df.loc[start_date:end_calibration, tag].mean()
print(f"Calibration mean: {annual_calibration_mean}\n{annual_df}")

In [None]:
annual_df.loc[end_period_two:]

In [None]:
annual_df.plot.bar()

In [None]:
monthly_df = df.resample("ME").sum()
monthly_calibration_mean = monthly_df.loc[start_date:end_calibration, tag].mean()
print(
    f"Monthly Calibration mean: {monthly_calibration_mean}\nAnnual Calibration mean: {monthly_calibration_mean * 12}\n{monthly_df}"
)

In [None]:
monthly_df["monthly fraction"] = -(
    monthly_df[tag] - monthly_df[tag].resample("YE").transform("mean")
) / monthly_df[tag].resample("YE").transform("mean")
monthly_df.loc[monthly_df.index <= end_calibration, "monthly fraction"] = 0.0
monthly_df.loc[monthly_df.index > end_period_three, "monthly fraction"] = 0.0

monthly_df

In [None]:
monthly_df[monthly_df.index > end_period_three - pd.DateOffset(years=1)]

In [None]:
monthly_df[monthly_df.index > end_period_three - pd.DateOffset(years=1)].sum()

In [None]:
mean = 6
std_dev = 1.5
months = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
cdf_value = norm.cdf(months, loc=mean, scale=std_dev)
et_fraction_values = np.diff(cdf_value)
et_fraction_values, et_fraction_values.sum()

In [None]:
plt.plot(months[:-1], et_fraction_values)

In [None]:
et_fraction = 0.7365  # target 0.7512
monthly_df["lake et (inches)"] = (
    monthly_df["PRCP (Inches)"].resample("YS").transform("sum") * et_fraction
)
for index, row in monthly_df.iterrows():
    imonth = index.month - 1
    days = index.days_in_month
    monthly_df.loc[index, "lake et (inches)"] *= (
        row["lake et (inches)"] * et_fraction_values[imonth] / days
    )
# monthly_df

In [None]:
monthly_df.loc[
    monthly_df.index <= end_calibration, "lake et (inches)"
].mean() / monthly_df.loc[monthly_df.index <= end_calibration, "PRCP (Inches)"].mean()

In [None]:
monthly_df["lake et (inches)"].max(), monthly_df["lake et (inches)"].min()

In [None]:
et_fraction = 0.6295  # target 0.5488
monthly_df["land et (inches)"] = (
    monthly_df["PRCP (Inches)"].resample("YS").transform("sum") * et_fraction
)
for index, row in monthly_df.iterrows():
    imonth = index.month - 1
    days = index.days_in_month
    monthly_df.loc[index, "land et (inches)"] *= (
        row["land et (inches)"] * et_fraction_values[imonth] / days
    )
# monthly_df

In [None]:
monthly_df.loc[
    monthly_df.index <= end_calibration, "land et (inches)"
].mean() / monthly_df.loc[monthly_df.index <= end_calibration, "PRCP (Inches)"].mean()

In [None]:
monthly_df["land et (inches)"].max(), monthly_df["land et (inches)"].min()

In [None]:
ax = (
    monthly_df["PRCP (Inches)"]
    .loc[end_period_three - pd.DateOffset(years=1) : end_period_three]
    .plot(kind="bar", position=1)
)
monthly_df["lake et (inches)"].loc[
    end_period_three - pd.DateOffset(years=1) : end_period_three
].plot(kind="bar", ax=ax, position=0, color="red")

In [None]:
ax = (
    monthly_df["PRCP (Inches)"]
    .loc[end_period_three - pd.DateOffset(years=1) : end_period_three]
    .plot(kind="bar", position=1)
)
monthly_df["land et (inches)"].loc[
    end_period_three - pd.DateOffset(years=1) : end_period_three
].plot(kind="bar", ax=ax, position=0, color="red")

In [None]:
monthly_df.loc[
    monthly_df.index > end_calibration, "lake et (inches)"
].mean() / monthly_df.loc[monthly_df.index > end_calibration, "PRCP (Inches)"].mean()

In [None]:
monthly_df["netrch lake (inches)"] = monthly_df[tag] - monthly_df["lake et (inches)"]
monthly_df["netrch land (inches)"] = monthly_df[tag] - monthly_df["land et (inches)"]
monthly_df

In [None]:
reilly, virginia_city = -67000.0, -268000.0
pws_fraction = 0.75
monthly_df["reilly"] = (
    reilly + (1.0 - pws_fraction) * reilly * monthly_df["monthly fraction"]
)
monthly_df["vc"] = (
    virginia_city
    + (1.0 - pws_fraction) * virginia_city * monthly_df["monthly fraction"]
)
monthly_df.loc[monthly_df.index <= end_calibration, "reilly"] = 0.0
monthly_df.loc[monthly_df.index <= end_calibration, "vc"] = 0.0

In [None]:
monthly_df.loc[monthly_df.index > end_calibration, "reilly"].mean()

In [None]:
ax = (
    monthly_df["PRCP (Inches)"]
    .loc[end_period_three - pd.DateOffset(years=1) : end_period_three]
    .plot(kind="bar", position=1)
)
(
    monthly_df["reilly"].loc[
        end_period_three - pd.DateOffset(years=1) : end_period_three
    ]
    * -1.0
).plot(kind="bar", ax=ax, secondary_y=True, position=0, color="red")

In [None]:
monthly_df.loc[monthly_df.index > end_calibration, "vc"].mean()

In [None]:
ax = (
    monthly_df["PRCP (Inches)"]
    .loc[end_period_three - pd.DateOffset(years=1) : end_period_three]
    .plot(kind="bar", position=1)
)
(
    monthly_df["vc"].loc[end_period_three - pd.DateOffset(years=1) : end_period_three]
    * -1.0
).plot(kind="bar", ax=ax, secondary_y=True, position=0, color="red")

In [None]:
monthly_df

### Write discretized temporal data

In [None]:
columns = [column for column in monthly_df.columns if "monthly fraction" not in column]

In [None]:
for sample_frequency in ("annual", "monthly"):
    if sample_frequency == "annual":
        frequency = "YE"
    elif sample_frequency == "monthly":
        frequency = "ME"
    else:
        raise ValueError(
            f"sample_frequency ('{sample_frequency}') must be 'annual' or 'monthly'"
        )

    perlen = [(end_calibration - start_date).days]
    for idx in range(2):
        dates = pd.date_range(
            start=end_periods[idx] - pd.Timedelta(days=1),
            end=end_periods[idx + 1],
            freq=frequency,
        )
        for jdx in range(len(dates) - 1):
            perlen.append((dates[jdx + 1] - dates[jdx]).days)

    spd_index = [start_date, end_calibration]
    on_date = end_calibration
    while on_date < end_period_three:
        if sample_frequency == "annual":
            year = on_date.year
            p = pd.Period(f"{year}-01-01")
            if p.is_leap_year:
                days = 366
            else:
                days = 365
        else:
            days = on_date.days_in_month

        dt = pd.Timedelta(days=days)
        on_date += dt
        spd_index.append(on_date)

    spd_df = pd.DataFrame(index=spd_index[1:], columns=columns)
    for idx in range(len(spd_index) - 1):
        t0, t1 = spd_index[idx], spd_index[idx + 1]
        sum = monthly_df[t0:t1].sum()
        mean = monthly_df[t0:t1].mean()
        for column in columns:
            if column not in ("reilly", "vc"):
                value = sum[column] / perlen[idx]
            else:
                value = mean[column]
            spd_df[column].iloc[idx] = value

    spd_df["perlen"] = perlen

    path = pl.Path(f"data/temporal_data_{sample_frequency}.parquet")
    spd_df.to_parquet(path)

In [None]:
spd_df