# Test Loss Functions on Multiple Models

In [None]:
import sys
sys.path.append('src')
import pandas as pd
import numpy as np
import xgboost as xg
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import yaml
# Local modules
from data_funcs import train_test_split_spacetime
from fmda_models import LM, XGB
from metrics import ros, rmse
import reproducibility

## Read and Split Data

In [None]:
df = pd.read_pickle("data/rocky_2023_05-09.pkl")
df = df.dropna()
reproducibility.set_seed(42)
X_train, X_test, y_train, y_test = train_test_split_spacetime(df)

## SUBSET COLUMNS
X_train = X_train[['Ed', 'hour']]
X_test = X_test[['Ed', 'hour']]

## Setup Models

In [None]:
with open('models/params.yaml', 'r') as file:
    params = yaml.safe_load(file)

params

In [None]:
models = {
    'xgb' : XGB(params['xgb']),
    'lm' : LM()
}

In [None]:
for mod in models:
    fitted = models[mod].fit(X_train, y_train)
    preds = models[mod].predict(X_test)
    print(rmse(preds, y_test))

In [None]:
def exp_weight(y_train, w=0.1):
    """
    Function to return weight vector of length equal to vector input y. 
    Math Definition: e^(-w*y) Used for weighted loss func.
    Parameters:
    -----------
    y_train : numpy array
        observed data vector. Used on training observations, never on test for forecasts
    w : float, default=0.1
        Column of dataframe to be used for y_train, y_test
    Returns:
    -----------
    Array of length (len(y_train))
    """
    return tf.exp(tf.multiply(-w, y_train))

In [None]:
def create_exp_function(w):
    def exp_function(y_train):
        return tf.exp(tf.multiply(-w, y_train))
    return exp_function

In [None]:
fun = create_exp_function(.05)
fun(y_train = np.array([1,2,3]))

In [None]:
def loss_setup(ws = None):
    loss_fucs = ["rss", "exp", "ros"]
    # set up return dictionary
    models = {
        'rss' : {
            'w_func' : None
        },
        'ros': {
            'w_func' : ros
        }
    } 
    # Using input omega parameter list, add dictionary key for exponential weighting for each omega in list 
    if ws is not None:
        for w in ws:
            assert isinstance(w, float) # Check that given list of floats
            dname = f"exp_{w}" # create name of dictionary key
            models[dname] = {
                'w_func' : create_exp_function(w)
            }
    return models

In [None]:
loss_dict = loss_setup(ws=np.linspace(0.01, .3, 3))
loss_dict

In [None]:
for l in loss_dict:
    print("~"*50)
    print(f"Running models for loss func: {l}")
    if loss_dict[l]['w_func'] is not None:
        weights = loss_dict[l]['w_func'](y_train)
    else:
        weights = None
    for mod in models:
        print(f"Fitting {mod}")
        models[mod].fit(X_train, y_train, weights)
        preds = models[mod].predict(X_test)
        print(f"Test RMSE for {mod}: {rmse(preds, y_test)}")
        print(f"Test ROS RMSE for {mod}: {rmse(ros(preds), ros(y_test))}")