# Predicting FMC with trained 10h Model

Model trained on hourly dead 10h. Applying model to local weather and comparing to field observations.

Geographic info from Mesonet slapout: https://www.mesonet.org/about/station-information?stid=slap

## Setup

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from src.models import moisture_rnn as mrnn
from src.utils import read_yml, time_intp, plot_styles, str2time

In [None]:
# Read Trained model
params = read_yml("models/params.yaml")
rnn = mrnn.RNN_Flexible(params=params)
scaler = joblib.load("models/scaler.joblib")

In [None]:
rnn.load_weights('models/rnn.keras')

## Observed FMC Data

In [None]:
weather = pd.read_excel("data/processed_data/dvdk_weather.xlsx")
fm10 = pd.read_excel("data/processed_data/ok_10h.xlsx")

In [None]:
# Geographic Variables from Slapout station
X = pd.DataFrame({
    "Ed": weather.Ed,
    "Ew": weather.Ew,
    "solar": weather["solar"],
    "wind": weather["wind"],
    "elev": 774,
    "lon": -100.261920,
    "lat": 36.597490,
    "rain": weather["rain"],
    "hod": weather.hod_utc,
    "doy": weather.doy_utc
})

print(f"Columns match: {np.all(X.columns == params['features_list'])}")

In [None]:
# Get scaler from RNN Data
XX = scaler.transform(X)
XX = XX.reshape(1, *XX.shape)

In [None]:
# Treating as a single batch
preds = rnn.predict(XX).flatten()

In [None]:
# Print info used in results
print(f"Start Time: {fm10.utc_prov.min()}")
print(f"End Time: {fm10.utc_prov.max()}")
print(f"N. Hours: {preds.shape[0]}")
print(f"N. Obs: {fm10.shape[0]}")

## Compare to observed

Issues to investigate:
- FMC sensors appear to have a maximal response to rain (see plot `data/SHQC1_0_4.png`).
    - Is this just FTS sensor? Or campbell too?
    - Will fine-tuning address this?
- 

In [None]:
# document-safe defaults
FIGSIZE = (10, 8)
DPI = 300
LABEL_SIZE = 14
TICK_SIZE = 12
CBAR_LABEL_SIZE = 13

In [None]:
plot_styles["fm"]["label"] = "Observed FM10"
plot_styles["model"]["label"] = "Predicted FM10"

In [None]:
start_time = pd.Timestamp("1996-03-27")
end_time = start_time + pd.Timedelta(days=7)
weather2 = weather[(weather.date >= start_time) & (weather.date < end_time)]
fm_plt = fm10[(fm10.date >= start_time) & (fm10.date <= end_time)]
x = weather2.date
p2 = preds[(weather.date >= start_time) & (weather.date < end_time)]

fig, ax1 = plt.subplots(dpi=DPI)
# Left-axis series
ax1.scatter(fm_plt.date, fm_plt.fm10, **plot_styles['fm'])
plt.plot(x, p2, **plot_styles["model"])
ax1.plot(x, weather2.Ed, **plot_styles['Ed'])
ax1.plot(x, weather2.Ew, **plot_styles['Ew'])
ax1.set_ylabel("FMC (%)", fontsize=LABEL_SIZE)
ax1.set_ylim(0, 50)
ax1.tick_params(axis='x', labelrotation=45, labelsize=TICK_SIZE)
ax1.tick_params(axis='y', labelsize=TICK_SIZE)

# Right-axis (rain)
ax2 = ax1.twinx()
ax2.tick_params(axis='y', labelsize=TICK_SIZE, colors='black')
ax2.plot(x, weather2.rain, **plot_styles['rain'])
ax2.set_ylabel("Rain (mm h$^{-1}$)", fontsize=LABEL_SIZE)
ax2.tick_params(axis='y', colors='black')
ax2.set_ylim(0, 10)
ax2.yaxis.label.set_color('black')

# Shared formatting
plt.title(None)
fig.legend(loc='upper left', bbox_to_anchor=(1, 0.9), fontsize=CBAR_LABEL_SIZE)
ax1.grid()
fig.savefig(f"outputs/ts_rnn_zeroshot.png", dpi=DPI, bbox_inches='tight')

In [None]:
ml_data = pd.read_pickle("models/ml_data.pkl")

In [None]:
st = "BAWC2" # Station near Green Mtn, SW of Denver
start_time = pd.Timestamp("2024-05-07", tz="UTC")
end_time = start_time + pd.Timedelta(days=7)
dat = ml_data[st]["data"]
dat = dat[(dat.date_time >= start_time) & (dat.date_time < end_time)]

print(f"{st=}")
print(f"{start_time=}")
print(f"{end_time=}")
print(f"N. Hours: {dat.shape[0]}")

In [None]:
x = dat["date_time"]

fig, ax1 = plt.subplots(dpi=DPI)

# Left-axis series
ax1.plot(x, dat.fm, **plot_styles['fm'])
ax1.plot(x, dat.Ed, **plot_styles['Ed'])
ax1.plot(x, dat.Ew, **plot_styles['Ew'])
ax1.set_ylabel("FMC (%)", fontsize=LABEL_SIZE)
ax1.set_ylim(0, 50)
ax1.tick_params(axis='x', labelrotation=45, labelsize=TICK_SIZE)
ax1.tick_params(axis='y', labelsize=TICK_SIZE)

# Right-axis (rain)
ax2 = ax1.twinx()
ax2.tick_params(axis='y', labelsize=TICK_SIZE, colors='black')
ax2.plot(x, dat.rain, **plot_styles['rain'])
ax2.set_ylabel("Rain (mm h$^{-1}$)", fontsize=LABEL_SIZE)
ax2.tick_params(axis='y', colors='black')
ax2.set_ylim(0, 10)
ax2.yaxis.label.set_color('black')

# # Shared formatting
plt.title(None)
fig.legend(loc='upper left', bbox_to_anchor=(1, 0.9), fontsize=CBAR_LABEL_SIZE)
ax1.grid()
fig.savefig(f"outputs/ts_{st}.png", dpi=DPI, bbox_inches='tight')

## Calculate Error

RNN predictions exactly on the hour, field observations are not. Using linear interpolation to line up RNN predictions to exact time of observed FMC.

In [None]:
# Linear interp Interp predictions to exact times of FMC
preds2 = time_intp(
    t1 = weather.utc.to_numpy(),
    v1 = preds,
    t2 = fm10.utc_prov.to_numpy()
)

In [None]:
# Check interpolated preds first
inds = np.arange(24, 168*2+24)
dates = weather.date.iloc[inds]
df = fm10.copy()
df["preds"] = preds2
df2 = df[(df.date >= dates.min()) & (df.date <= dates.max())]

plt.plot(dates, preds[inds], color="k", linestyle="dotted", label="Model output")
plt.scatter(df2.date, df2.preds, label="Interpolated", zorder=2)
plt.xticks(rotation=90)
plt.legend()
plt.xlabel("FMC (%)")
plt.title("Forecasted RNN Interpolated to Exact Time of FMC Observation - No Retrain")

In [None]:
# Compare accuracy
from sklearn.metrics import r2_score

print(f"Overall RMSE: {np.sqrt(mean_squared_error(df.fm10, df.preds))}")
print(f"Overall Bias: {np.mean(df.fm10 - df.preds)}")
print(f"Overall R2: {r2_score(df.fm10, df.preds)}")
plt.scatter(df.fm10, df.preds)
plt.grid()
plt.axline((0, 0), slope=1, color="k", linestyle="dashed", zorder=2)
plt.title("Observed vs Predicted FMC")

In [None]:
print(f"N. Obs Less than equal to 30: {np.sum(fm10.fm10 <= 30)}")
print(f"Mean Obs Less than equal to 30: {np.mean(fm10.fm10 <= 30)}")

In [None]:
inds = np.where(df.fm10<30)[0]
print(f"FM<= 30 RMSE: {np.sqrt(mean_squared_error(df.fm10.iloc[inds], df.preds.iloc[inds]))}")
print(f"FM<= 30 Bias: {np.mean(df.fm10.iloc[inds] - df.preds.iloc[inds])}")
print(f"FM<= 30 R2: {r2_score(df.fm10[inds], df.preds[inds])}")

plt.scatter(df.fm10.iloc[inds], df.preds.iloc[inds])
plt.axline((0, 0), slope=1, color="k", linestyle="dashed", zorder=2)
plt.grid()
plt.title("Observed vs Predicted (<30% FMC)")