In [None]:
import sys
sys.path.append('../src')
import numpy as np
from numpy import random
from scipy.stats import norm
from scipy.interpolate import CubicSpline
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.backend as K
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import yaml
from metrics import ros_3wind

from fmda_models import XGB
import reproducibility

# Custom Loss Functions for Fuel Moisture Models

*Author:* Jonathon Hirschi

## Fuel Moisture Background

[Fuel moisture content](https://www.ncei.noaa.gov/access/monitoring/dyk/deadfuelmoisture) is a measure of the water content of burnable materials.

## Fuel Moisture Nonlinear Effect on Rate of Spread

[Rate of spread](https://www.nwcg.gov/course/ffm/fire-behavior/83-rate-of-spread#:~:text=The%20rate%20of%20spread%20is,origin%20quickly%20with%20great%20intensity.) (ROS) is a measure of the speed a fire moves (often units of m/s). The following image shows the nonlinear relationship between FM and ROS at a single spatial location, while holding other variables associated with ROS constant. Wildfire spreads most readily in dry fuels, as seen in the peak of the ROS curve at zero FMC. The ROS drops off quickly as fuels get wetter, but then it levels off until the ROS is zero, or when the FM reaches the "extinction value". Below is an idealized rate of spread curve for fuel category 8, "Closed Timber Litter" ([NIFC Category Descriptions](https://gacc.nifc.gov/rmcc/predictive/Fire%20Behavior%20Fuel%20Model%20Descriptions.pdf)). This fuel is selected since it is closest to an idealized 10hr fuel. The fuel load contribution from dead 10hr fuels is the highest of any of the other fuel categories, and there is no contribution from live fuels.

The ROS strongly depends on wind speed and slope. Below is the idealized ROS relationship with FMC with zero wind and zero slope.

<img src="../images/fuel8_ros_fm.png" alt="alt text" style="width: 500px;"/>

Next is the idealized ROS with a constant wind speed of 3m/s (x-direction only) and 0 slope. Note the y-axis is much higher. 

<img src="../images/fuel8_ros_fm_3wind.png" alt="alt text" style="width: 500px;"/>

The goal of this research project is to train machine learning models of fuel moisture that are the most accurate at forecasting the driest fuels. The motivation for this is to provide the most accurate forecasts of wildfire rate of spread. 

Next, we construct the idealized ROS curve for 3m/s wind speed. These values are from the output of `wrf-fire-matlab`, which uses simulation results from WRF-SFIRE.

In [None]:
# # Construct Idealized ROS curve from eyeballing plot
# x = np.array([0, 5, 10, 15, 20, 25, 30, 35])
# y = np.array([7.5, 4.3, 3.1, 2.6, 2.1, 1.4, 0, 0])*10**-3
xvals = np.linspace(start=0, stop=35, num=100)

# ros_f = CubicSpline(x, y)
# def ros_0wind(fm):
#     r = ros_f(fm)
#     r[fm>30]=0
#     return r

plt.plot(xvals, ros_3wind(xvals), "red")
plt.xlabel("Fuel Moisture (%)")
plt.ylabel("Rate of Spread (m/s)")
plt.title("ROS Curve")
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.grid()

## Exponential Weighting of Residuals 

The standard loss function in machine learning when modeling a continuous output is the Residual Sum of Squares (RSS). Model parameters are then selected such that they minimize the RSS:

$$
RSS = \sum (y_i - d_i)^2
$$ 

In the standard RSS, each residual is given equal weight when calculating the total model loss. A simple change to the RSS is to add weights that increase or decrease the relative contribution of a particular residual to the overall loss of a model output. We will refer to this as the Weighted RSS (WRSS):

$$
WRSS = \sum w_i(y_i - d_i)^2
$$

The weights $w_i$ could in principle come from anywhere, but a common and useful technique is to construct weights based on the value of the true observed data $d_i$. The WRSS is commonly used in imbalanced classification tasks, where there is much less observed data for one class label than the other class labels. 

In the context of fuel moisture modeling, we will examine weights that are related to the observed value of fuel moisture using a negative exponential to give greater weight to drier fuels. The weights for the $i$th residual will be $e^{-\omega d_i}$, where the parameter $\omega$ represents the strength of the weighting scheme relative to an unweighted RSS. The WRSS would therefore have the form:

$$
\sum e^{-\omega d_i}(y_i - d_i)^2
$$

For $\omega = 0$, $e^{-\omega d_i} = 1$, and we recreate the unweighted RSS. As we increase $\omega$, greater weight is placed on the lower values of fuel moisture. The exponential weights will always be positive, even for very high values of fuel moisture. But for an estimated moisture of extinction value of 30% for 10h fuels, values of $\omega$ greater than $0.2$ lead to close to zero weight being placed on residuals associated with fuel moisture observations of 30% or greater. Below we plot the weighting scheme for various values of $\omega$:

In [None]:
fms = np.linspace(0, 35, 100)

fig, ax = plt.subplots(1, 2, figsize=(10, 5))

ax[0].plot(fms, ros_3wind(fms), 'r-', label='Rate of Spread (Scaled)')  
ax[0].set_xlabel('Fuel Moisture (%)')
ax[0].set_ylabel('Rate of Spread (m/s)', color='red')
ax[0].tick_params('y', colors='r')
ax[0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax[0].grid(True)

weights = np.ones(len(fms))
ax[1].plot(fms, weights, 'blue', label='Equal Weight (unweighted)') 

weights = tf.exp(tf.multiply(-0.01, fms))
ax[1].plot(fms, weights, 'b--', label='$e^{-0.01}$ Weight') 

weights = tf.exp(tf.multiply(-0.025, fms))
ax[1].plot(fms, weights, 'b-.', label='$e^{-0.025}$ Weight') 

weights = tf.exp(tf.multiply(-0.05, fms))
ax[1].plot(fms, weights, 'b:', label='$e^{-0.05}$ Weight') 

weights = tf.exp(tf.multiply(-0.1, fms))
ax[1].plot(fms, weights, 'b--', label='$e^{-0.1}$ Weight') 

weights = tf.exp(tf.multiply(-0.5, fms))
ax[1].plot(fms, weights, 'b--', label='$e^{-0.5}$ Weight') 

ax[1].plot(fms, ros_3wind(fms) / ros_3wind(fms).max(), 'r')

ax[1].set_xlabel('Fuel Moisture (%)')
ax[1].set_ylabel('Weight', color='blue')
ax[1].tick_params('y', colors='b')
ax[1].grid(True)

fig.legend(loc="upper left", bbox_to_anchor=(1, .8))
plt.tight_layout()
plt.savefig('../outputs/weights.png', bbox_inches='tight')
plt.show()

## Test Example

In [None]:
df_all = pd.read_pickle("../data/rocky_2023_05-09.pkl")
df = df_all[df_all['stid'] == "TT696"]
df = df[
    (df['date'] >= '2023-06-01') &
    (df['date'] <= '2023-06-14')
]

plt.plot(df.date, df.fm)
plt.plot(df.date, df.rain)
plt.title("FM Observations at CPTC2 from 2023-06-01 through 2023-06-14")
plt.xticks(rotation=90)
plt.grid()

Now we train a simple XGBoost model on the first 13 days and predict the last one.

In [None]:
df

In [None]:
X_train = df[["Ed", "Ew", "hour", 'rain']][df.date < '2023-06-13'] # get columns for model
y_train = df["fm"][df.date < '2023-06-13']a
train_dates = df["date"][df.date < '2023-06-13']

X_test = df[["Ed", "Ew", "hour", 'rain']][df.date >= '2023-06-13'] # get columns for model
y_test = df["fm"][df.date >= '2023-06-13']
test_dates = df["date"][df.date >= '2023-06-13']

print(f"Training Observations: {y_train.shape[0]}")
print(f"Test Observations: {y_test.shape[0]}")

In [None]:
with open('../models/params.yaml', 'r') as file:
    all_params = yaml.safe_load(file)

params = all_params["xgb"]
params

In [None]:
reproducibility.set_seed(123)
model = XGB(loss='reg:squarederror',params=params)
model.fit(X_train, y_train)
fitted = model.predict(X_train)
preds = model.predict(X_test)

In [None]:
plt.plot(df.date, df.fm, label = "FM Observed")
plt.plot(train_dates, fitted, label = "Fitted")
plt.plot(test_dates, preds, label = "Forecasts")
plt.title("FM Observations at CPTC2 from 2023-06-01 through 2023-06-14")
plt.xticks(rotation=90)
plt.legend()
plt.grid()

In [None]:
# Summarise Error 
print(f"Test RMSE: {np.sqrt(mean_squared_error(y_test, preds))}")
print(f"Test Mean Bias: {np.mean(preds-y_test)}")

The RMSE shows middling model accuracy, but this metric treats negative and positive errors equally. If we examine the average bias of the model, the model is systematically overpredicting FMC in the prediction phase. Here, bias is defined simply as observed minus predicted.

In [None]:
weights = tf.exp(tf.multiply(-0.1, y_train))

In [None]:
reproducibility.set_seed(123)
model = XGB(params=params)
model.fit(X_train, y_train, weights)
fitted2 = model.predict(X_train)
preds2 = model.predict(X_test)

In [None]:
# Summarise Error 
print(f"Test RMSE: {np.sqrt(mean_squared_error(y_test, preds2))}")
print(f"Test Mean Bias: {np.mean(preds2-y_test)}")

In [None]:
plt.plot(df.date, df.fm, label = "FM Observed")
plt.plot(train_dates, fitted2, label = "Fitted")
plt.plot(test_dates, preds2, label = "Forecasts")
plt.title("FM Observations at CPTC2 from 2023-06-01 through 2023-06-14")
plt.xticks(rotation=90)
plt.legend()
plt.grid()

## Predicting ROS

In [None]:
plt.plot(df.date, ros(df.fm))
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.xticks(rotation=90)
plt.ylabel("Rate of Spread (m/s)")
plt.grid()

In [None]:
ros_tr = ros(fitted)
ros_te = ros(preds)

ros2_tr = ros(fitted2)
ros2_te = ros(preds2)

In [None]:
print("RMSE on ROS, unweighted: {:.2e}".format(np.sqrt(mean_squared_error(ros(y_test), ros_te))))
print("RMSE on ROS, weighted: {:.2e}".format(np.sqrt(mean_squared_error(ros(y_test), ros2_te))))
print("~"*25)
print("Bias on ROS, unweighted: {:.2e}".format(np.mean(ros_te - ros(y_test))))
print("Bias on ROS, weighted: {:.2e}".format(np.mean(ros2_te - ros(y_test))))

### ROS Forecasts based on Weights

In [None]:
reproducibility.set_seed(123)

# Grid of exponential weight parameter omega
w = np.linspace(0, .3, 10)

rss = np.zeros_like(w)


for i in range(0, len(w)):
    wi = w[i]
    weights = tf.exp(tf.cast(tf.multiply(-wi, y_train), tf.float64))
    model = XGB(params=params)
    model.fit(X_train, y_train, weights)
    preds = model.predict(X_test)
    preds = ros(preds)
    rss[i] = np.sqrt(mean_squared_error(ros(y_test), preds))

In [None]:
plt.plot(w, rss)
plt.title("XGBoost accuracy on ROS")
plt.axvline(w[rss.argmin()], color='k', linestyle='dashed')
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.xlabel("$\omega$")
plt.ylabel("RSS on Forecasted ROS")
plt.grid()

## References

* Open Wildland Fire Modeling E Community. https://wiki.openwfm.org/wiki/
* National Wildfire Coordinating Group (NWCG). https://www.nwcg.gov/course/ffm/
* *Dead Fuel Moisture*, NOAA National Centers for Environmental Information. https://www.ncei.noaa.gov/access/monitoring/dyk/deadfuelmoisture
* *Custom Loss Functions in Environmental Science*:

Ebert‐Uphoff, Imme, Ryan Lagerquist, Kyle Hilburn, Yoonjin Lee, Katherine Haynes, Jason Stock, Christina Kumler and Jebb Stewart. “CIRA Guide to Custom Loss Functions for Neural Networks in Environmental Sciences - Version 1.” ArXiv abs/2106.09757 (2021): n. pag. 