## Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from src.utils import time_intp, plot_styles, time_range
from src.reproducibility import set_seed
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
weather = pd.read_excel("data/processed_data/mesonet.xlsx")
fm1 = pd.read_excel("data/processed_data/ok_1h.xlsx")
fm10 = pd.read_excel("data/processed_data/ok_10h.xlsx")
fm100 = pd.read_excel("data/processed_data/ok_100h.xlsx")

## Data Process

Line up fm data to nearest half hour for now

Find a train/test period with no missing weather data and no rain

In [None]:
fm1['date'] = fm1['date'].dt.tz_localize('Etc/GMT+6')
fm1['date'] = fm1['date'].dt.tz_convert('UTC')
fm1["date"] = fm1["date"].dt.round("30min")

fm10['date'] = fm10['date'].dt.tz_localize('Etc/GMT+6')
fm10['date'] = fm10['date'].dt.tz_convert('UTC')
fm10["date"] = fm10["date"].dt.round("30min")

fm100['date'] = fm100['date'].dt.tz_localize('Etc/GMT+6')
fm100['date'] = fm100['date'].dt.tz_convert('UTC')
fm100["date"] = fm100["date"].dt.round("30min")

weather['date'] = weather['date'].dt.tz_localize('UTC')

In [None]:
# train_start = fm10.date.min()
# train_end = train_start + pd.Timedelta(hours=24*7)
# test_end = fm10.date.max()
train_start = pd.to_datetime('1996-04-07 12:00:00+00:00')
train_end = train_start + pd.Timedelta(hours=24*7)
test_end = train_end + pd.Timedelta(hours=24*7)

# Weather 
w_train   = weather[(weather.date <= train_end) & (weather.date >= train_start)].reset_index(drop=True)
w_test   = weather[(weather.date > train_end) & (weather.date <= test_end)].reset_index(drop=True)
print(f"Number missing Eq/rain: {len(w_train.index[w_train['rain'].isna() & w_train['Ed'].isna() & w_train['Ew'].isna()])}")
print(f"Max rain: {w_train.rain.max()}")
# FMC Observations
fm_train = fm10[(fm10.date >= train_start) & (fm10.date <= train_end)].reset_index(drop=True)
fm_test  = fm10[(fm10.date > train_end) & (fm10.date <= test_end)].reset_index(drop=True)

fm1_train = fm1[(fm1.date >= train_start) & (fm1.date <= train_end)].reset_index(drop=True)
fm100_train = fm100[(fm100.date >= train_start) & (fm100.date <= train_end)].reset_index(drop=True)

In [None]:
plt.plot(w_train.date, w_train.Ed, **plot_styles["Ed"])
plt.plot(w_train.date, w_train.Ew, **plot_styles["Ew"])
plt.plot(w_train.date, w_train.rain, **plot_styles["rain"])
_ = plt.xticks(rotation=90)
plt.scatter(fm_train.date, fm_train.fm10, **plot_styles["fm"])
plt.legend()

## Define 10h Model

Simple Neural ODE
- Single Eq input: averaging Ed and Ew for now
- Time grid: hourly from start fm time (will evaluate sub-hourly later)
- Architecture: 2 inputs (1 weather, 1 state), 1 cell hidden layer/output 

Using tensorflow and manually implemented ODE solvers for now. For real model, tools already exist in pytorch

In [None]:
# Params for phys-initiation
T_k = 10     # timelag class
b = 0        # no bias
W_x = 1/T_k  # weight for weather input
W_h = -1/T_k # weight for state 


# Initial State
m0 = fm_train.fm10.iloc[0]
print(f"Initial State: {m0}")

# Hourly time grid
tgrid = time_range(fm_train.date.iloc[0], w_train.date.iloc[-1], freq="1h")
print(f"Number of times: {tgrid.shape[0]}")
print(f"Unique Time lags in weather: {w_train.date.diff().unique()[1:][0]}")
w_inputs = w_train.set_index("date").reindex(tgrid).reset_index()
w_inputs["E"] = (w_inputs["Ed"] + w_inputs["Ew"]) / 2
X = w_inputs.E.to_numpy()

In [None]:
# Two inputs: state h and one weather variable x
inputs = layers.Input(shape=(2,))   # [h, x]
# Single unit and ReLU
output = layers.Dense(1, activation="linear")(inputs)
# Build the model
f_theta = models.Model(inputs=inputs, outputs=output)

# Set weights manually
f_theta.layers[1].set_weights([
    np.array([[W_h], [W_x]]),
    np.array([b])
])

In [None]:
f_theta.summary()

In [None]:
input_0 = np.array([[m0, X[0]]], dtype=np.float32)
dhdt_0 = f_theta(input_0)

In [None]:
print(f"Derivative at time 0: {dhdt_0}")

In [None]:
# Manual check: dhdt = (E-m0)/T_k, should match to rounding error
print(f"Manual calc: {(X[0] - m0)/T_k}")

In [None]:
def forward_euler(h, x, f, dt, alpha=1):
    """
    f is a neural network that expects input [[h, x]]
    alpha = time scaling parameter
    """
    h = np.atleast_1d(h).astype(np.float32)
    x = np.atleast_1d(x).astype(np.float32)
    hx = np.concatenate((h, x)).reshape(1, -1)
    fhx = alpha*f(hx).numpy()[0][0] # 0th batch, 0th unit
    # print(f"dhdt nn: {fhx}")
    return h + dt*fhx

In [None]:
def forward_euler2(h, x, dt):
    """
    f is a neural network that expects input [[h, x]]
    """
    h = np.atleast_1d(h).astype(np.float32)
    x = np.atleast_1d(x).astype(np.float32)
    fhx = (x-h) / T_k
    # print(f"dhdt manual: {fhx}")
    return h + dt*fhx

In [None]:
m = np.zeros(tgrid.shape[0])
mm = np.zeros(tgrid.shape[0])
m0 = fm_train.fm10.iloc[0] # define again for clarity
m[0] = m0
mm[0] = m0
for i in range(len(tgrid) -1):
    Xi = X[i]
    m[i+1] = forward_euler2(m[i], Xi, dt=1)[0]
    mm[i+1] = forward_euler(mm[i], Xi, f_theta, dt=1)[0]

$$
m(t+1)= m(t) + \Delta t f(m(t), E(t))
$$

$$
f(m(t), E(t)) = \frac{dm}{dt} = \frac{E - m}{T_k}
$$

In [None]:
plt.plot(w_inputs.date, X, **plot_styles["Ew"])
plt.plot(w_inputs.date, mm, color="#468a29", label="NODE")
plt.plot(w_inputs.date, m, color="k", linestyle="--", label="Manual F.E.")
plt.scatter(fm_train.date, fm_train.fm10, **plot_styles["fm"])
_ = plt.xticks(rotation=90)
plt.legend()

## Time warp to 1h/100h

In [None]:
# plt.plot(w_inputs.date, X, **plot_styles["Ew"])
plt.plot(
    fm_train.date,
    fm_train.fm10,
    color='k',
    linestyle="dashed",
    label="10h",
    marker='o',          # circle
    markersize=5
)

plt.plot(
    fm1_train.date,
    fm1_train.fm1,
    color="#468a29",
    label="1h",
    marker='^',          # triangle
    markersize=5
)

plt.plot(
    fm100_train.date,
    fm100_train.fm100,
    color="#25D902",
    label="100h",
    marker='s',          # square (suggested third shape)
    markersize=5
)
_ = plt.xticks(rotation=90)
plt.legend()
plt.grid()

In [None]:
# 1h / 100h
m1 = np.zeros(tgrid.shape[0])
m100 = np.zeros(tgrid.shape[0])
m1[0] = fm1_train.fm1.iloc[0]
m100[0] = fm100_train.fm100.iloc[0]
for i in range(len(tgrid) -1):
    Xi = X[i]
    m1[i+1]   = forward_euler(m1[i], Xi, f_theta, dt=1, alpha=10)[0]
    m100[i+1] = forward_euler(m100[i], Xi, f_theta, dt=1, alpha=1/10)[0]

In [None]:
# plt.plot(w_inputs.date, X, color="blue", alpha=0.5)
plt.plot(w_inputs.date, mm, color="k", label="10h")
plt.plot(w_inputs.date, m1, color="#468a29", linestyle="dotted", label="1h")
plt.plot(w_inputs.date, m100, color="#25D902", linestyle="dashed", label="100h")
_ = plt.xticks(rotation=90)
plt.legend()
plt.grid()