# Test Loss Functions on Multiple Models

In [None]:
import sys
sys.path.append('src')
import pandas as pd
import numpy as np
import xgboost as xg
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from utils import create_exp_function, loss_setup, initialize_models
import yaml
import pickle
import os.path as osp
from datetime import timedelta
import matplotlib.pyplot as plt
# Local modules
from data_funcs import train_test_split_spacetime
from fmda_models import LM, XGB, RF
from metrics import ros_0wind, ros_3wind, rmse
import reproducibility

## Read Data

In [None]:
df = pd.read_pickle("data/raws_df.pkl")
df = df.dropna()
df.columns

In [None]:
df

## Setup Models

In [None]:
with open('models/params.yaml', 'r') as file:
    params = yaml.safe_load(file)

params

In [None]:
weight_grid=np.round(np.linspace(0.01, .25, 10), 4)
models, loss_dict = loss_setup(params=params, ws=weight_grid)
# loss_dict

## Run Analysis

In [None]:
## COLUMNS SUBSET
cols = ["Ed", "rain", "wind", "solar", "hour", "doy", "lat", "lon", 'elev']
reproducibility.set_seed(123)

For each loss function and each model, we will collect 2 arrays of errors on the test set. One for the RMSE on the test fuel moisture observations, and another one on the RMSE for the same observations transformed to ROS.

In [None]:
t = df.index.min()
i = 0
tdelta = 2 # number of days to shift train/test period
while t <= (df.index.max() - timedelta(days = 30)):
    print("~"*50)
    print(f"Iteration: {i}")
    print(f"t: {t}")
    # Build train/test from 30 day period after t
    df_temp = df[
        (df.index >= t) & (df.index < (t + timedelta(days=30)))
    ]
    X_train, X_test, y_train, y_test = train_test_split_spacetime(
        df_temp, 
        test_days = 2,
        spatial_test_frac = 0.2,
        verbose = True
    )
    X_train = X_train[cols]
    X_test = X_test[cols]
    # Run models
    # Reinitialize models dictionary to prevent multiple fitting iterations
    if True:
        models = initialize_models(params)
    for l in loss_dict:
        print("~"*50)
        print(f"Running models for loss func: {l}")
        if loss_dict[l]['w_func'] is not None:
            weights = loss_dict[l]['w_func'](y_train)
        else:
            weights = None

        for mod in models:
            print(f"Fitting {mod}")
            models[mod].fit(X_train, y_train, weights)
            preds = models[mod].predict(X_test)
            loss_dict[l][f"errs"][mod]["t"].append(t)
            loss_dict[l][f"errs"][mod]["rmse_test"].append(rmse(preds, y_test))
            loss_dict[l][f"errs"][mod]["rmse_test_ROS"].append(rmse(ros_3wind(preds), ros_3wind(y_test)))
            print(f"Test RMSE for {mod}: {rmse(preds, y_test)}")
            print(f"Test ROS RMSE for {mod}: {rmse(ros_3wind(preds), ros_3wind(y_test))}")

    # # Iterate test period by 2 so no overlap
    i+= 1 # iteration counter
    t = t + timedelta(days=tdelta)

In [None]:
results_fm = []
results_ros = []
for l in loss_dict:
    for mod in loss_dict[l]["errs"]:
        errs = loss_dict[l]["errs"][mod]['rmse_test']
        temp = {
            'RMSE': loss_dict[l]["errs"][mod]['rmse_test'],
            'Loss': [l] * len(errs),
            'Model': [mod] * len(errs),
            't': loss_dict[l]["errs"][mod]['t']
        }
        results_fm.append(pd.DataFrame(temp))
        errs = loss_dict[l]["errs"][mod]['rmse_test_ROS']
        temp = {
            'RMSE': loss_dict[l]["errs"][mod]['rmse_test_ROS'],
            'Loss': [l] * len(errs),
            'Model': [mod] * len(errs),
            't': loss_dict[l]["errs"][mod]['t']
        }
        results_ros.append(pd.DataFrame(temp))

In [None]:
results_fm = pd.concat(results_fm)
results_fm

In [None]:
results_ros = pd.concat(results_ros)
results_ros

In [None]:
# Write Dataframe
with open(osp.join("outputs", "results_fm.pkl"), 'wb') as handle:
    pickle.dump(results_fm, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(osp.join("outputs", "results_ros.pkl"), 'wb') as handle:
    pickle.dump(results_ros, handle, protocol=pickle.HIGHEST_PROTOCOL)