# Fine-Tuning RNN

The RNN was trained on hourly FM10 sensors, and we used `return_sequences=True` to compute loss across whole forecast window.

In the case of the Oklahoma field data, the samples twice daily but the weather is still hourly. In order to fine tune, we will rebuild the architecture in a different network and mask out the response vector to calculate loss on last time step of sequences, but still output the whole 48-hour sequence. 

Further, this architecture can be fit directly with no pretraining as the no-transfer baseline. We define a new loss that masks missing values of FMC. We construct the same 48-hour input seuqences, and a 48-hour FMC sequence that has ~4 non-missing values for twice daily data.

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from src.models.moisture_rnn import RNN_Flexible, build_training_batches_univariate
from src.utils import read_yml, time_intp, plot_styles, Dict

In [None]:
# Read Pre-Trained model
params = read_yml("models/params.yaml")
scaler = joblib.load("models/scaler.joblib")

In [None]:
conf = Dict(read_yml("etc/thesis_config.yaml"))

In [None]:
weather = pd.read_excel("data/processed_data/dvdk_weather.xlsx")
fm = pd.read_excel("data/processed_data/ok_100h.xlsx")

## Masked Loss Function

Response vector will be -9999 sentinel value, needs to mask out in loss calculation

### Compile Model

In [None]:
@tf.keras.utils.register_keras_serializable(package="custom")
def mse_masked(y_true, y_pred, mask_val = -9999):
    """
    Masked MSE: ignores targets equal to mask_val.
    Expects y_true and y_pred to have the same shape, e.g. (batch, T, 1).
    """
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    mask_val = tf.cast(mask_val, tf.float32)

    mask = tf.not_equal(y_true, mask_val)          # bool
    mask_f = tf.cast(mask, tf.float32)             # 0/1

    # Replace masked targets with predictions so their squared error is exactly 0
    y_true_clean = tf.where(mask, y_true, y_pred)

    sqe = tf.square(y_true_clean - y_pred)          # masked positions are 0
    sqe = sqe * mask_f                               # keep only for clarity

    denom = tf.reduce_sum(mask_f)
    denom = tf.maximum(denom, 1.0)                 # avoid divide-by-zero

    return tf.reduce_sum(sqe) / denom    
    return

In [None]:
rnn = RNN_Flexible(params=params, random_state=42, loss=mse_masked)

## Join and Split into Train/Val/Test

In [None]:
# Combine weather and fm, fill na, add geographic features
df = weather.merge(
    fm[["utc_rounded", "utc_prov", "fm100"]],
    left_on="utc",
    right_on="utc_rounded",
    how="left"
).drop(columns="utc_rounded")

df["elev"] = conf.ok_elev
df["lon"] = conf.ok_lon
df["lat"] = conf.ok_lat

df["fm100"] = df["fm100"].fillna(-9999)
df[["utc", "utc_prov", "fm100", "lon", "lat", "elev"]].head(5)

In [None]:
# Split times
X_train = df[(df.utc >= conf.train_start) & (df.utc <= conf.train_end)][params['features_list']]
y_train = df[(df.utc >= conf.train_start) & (df.utc <= conf.train_end)]["fm100"].to_numpy()

X_val = df[(df.utc >= conf.val_start) & (df.utc <= conf.val_end)][params['features_list']]
y_val = df[(df.utc >= conf.val_start) & (df.utc <= conf.val_end)]["fm100"].to_numpy()

X_test  = df[(df.utc >= conf.f_start) & (df.utc <= conf.f_end)][params['features_list']]
y_test = df[(df.utc >= conf.f_start) & (df.utc <= conf.f_end)]["fm100"].to_numpy()

print(f"{X_train.shape=}")
print(f"{y_train.shape=}")
print(f"{X_val.shape=}")
print(f"{y_val.shape=}")
print(f"{X_test.shape=}")
print(f"{y_test.shape=}")

In [None]:
# Scale using saved scaler object from RNN, reshape val and test to 3d array
X_train_scaled = scaler.transform(X_train)

XX_val = scaler.transform(X_val)
XX_val = XX_val.reshape(1, *XX_val.shape)
yy_val = y_val[np.newaxis, :, np.newaxis]

XX_test = scaler.transform(X_test)
XX_test = XX_test.reshape(1, *XX_test.shape)

In [None]:
X_train_samples, y_train_samples, masks = build_training_batches_univariate(X = X_train_scaled, y=y_train)

## Run Train From random initialization

In [None]:
rnn.fit(X_train_samples, y_train_samples, batch_size=64, epochs=100, verbose_fit=True, plot_history=True,
       validation_data=(XX_val, yy_val))

## Test Set

In [None]:
preds = rnn.predict(XX_test)

In [None]:
test_mse = mse_masked(y_test.flatten(), preds.flatten())

In [None]:
print(f"Test Set RMSE: {np.sqrt(test_mse)}")