## FSM training

In [1]:
# Libraries
import pandas as pd
import numpy as np
import os, warnings, tqdm
import time as timet
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_percentage_error as mape

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition

# ML Models
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor as DT
from sklearn.ensemble import RandomForestRegressor as RF
from sklearn.neighbors import KNeighborsRegressor as KNN
from sklearn.neural_network import MLPRegressor as MLP
from xgboost.sklearn import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor as GBR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split

import shap

# Scikit-Learn ML models to C++ (Uncomment to export)
# import m2cgen 

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [23]:
# Main functions
'''
# ML Models
- svr: Support vector regressor
- knn: K-nearest neighbor regressor
- dt: Decision tree regressor
- rf: Random forest regressor
- nn: Neural network
- xgboost; XGBoost regressor
'''
def model_fit(path=None, model_name=None, train_features=None, train_labels=None, rand_idx=0, fsm=None, export=False, inference=False, drop_var=None, n_cores=None, ros_mode=None):
    
    # Select ML model 
    if model_name == "svr":
        if ros_mode==None:
            model = MultiOutputRegressor(SVR(C=1.0))
        else: 
            model = SVR(C=1.0)
    elif model_name == "gbr":
        if ros_mode==None:
            model = MultiOutputRegressor(GBR())
        else: 
            model = GBR()    
    elif model_name =="knn":
        model = KNN()    
    elif model_name =="dt":
        model = DT(max_depth=1000)
    elif model_name == "rf":
        model = RF(n_estimators=1000)
    elif model_name == "nn":
        model = MLP()
    elif model_name =="xgboost":
        model = XGBRegressor(max_depth=1000, n_jobs=n_cores)

    # Fit model to predict and compute loss
    model.fit(train_features,train_labels)
    pred = model.predict(train_features)
    mse_loss = mse(pred, train_labels)
    # print("MSE Loss : ", mse_loss)
    mape_loss = mape(pred, train_labels)
    # print("MAPE Loss : ", mape_loss)

    # Model inference
    if inference:
        x_df = pd.read_csv(path) # Change path to input training dataset CSV
        if 'num' in x_df.columns:
            x_df = x_df.drop(columns = ['num'] + drop_var) 

        if fsm == 1: 
            label_encoder = LabelEncoder()
            encoded_feature = label_encoder.fit_transform(x_df['nftype'])
            x_df['nftype'] = encoded_feature                        
        elif fsm == 2: 
            label_encoder = LabelEncoder()
            encoded_feature = label_encoder.fit_transform(x_df['nftype'])
            x_df['nftype'] = encoded_feature    
        elif fsm == 3:
            x_df = x_df[['fueltype', 'ffmc', 'ws', 'bui', 'ps', 'saz', 'HROS', 'BROS', 'FROS']]
            label_encoder = LabelEncoder()
            encoded_feature = label_encoder.fit_transform(x_df['fueltype'])
            x_df['fueltype'] = encoded_feature

        ros_df = x_df.drop(['HROS','BROS','FROS'], axis=1)

        # Predict sample with trained FSM
        rand_idx = rand_idx # Fuel type 101, no wind, no slope, very dry
        ros_df_scaled = pd.DataFrame(ros_df, None, ros_df.keys())
        sample = ros_df_scaled.loc[[rand_idx]].values[0].tolist()
        actual = x_df.loc[[rand_idx]]
        # print("INDEX :", rand_idx); print("SAMPLE : ", sample); print("ACTUAL : ", actual[['HROS','BROS','FROS']])

        start_time = timet.time()
        yhat = model.predict(np.array([sample])); 
        final_time = timet.time() - start_time
        # print("--- %s seconds ---" % (final_time))
        # print("Prediction of Fuel Model " + str(rand_idx) + " :", yhat)
    else:
        final_time = None # No inference time recorded

    # Export model to C++ (To port to Cell2Fire)
    if export:
        # Export model to C++
        code = m2cgen.export_to_c(model, function_name=ros_mode)
        text_file = open(model_name + '_kitral_' + ros_mode + '.h', "wt", ) 
        n = text_file.write(code)
        text_file.close()

    return mape_loss, mse_loss, model, final_time    
    
def train_data(path, ros_mode, fsm, drop_var=[]):

    # Load input features and ROS outputs
    df = pd.read_csv(path)

    # BehavePlus
    if fsm == 1: 
        label_encoder = LabelEncoder()
        encoded_feature = label_encoder.fit_transform(df['nftype'])
        df['nftype'] = encoded_feature        

    # KITRAL                
    elif fsm == 2: 
        label_encoder = LabelEncoder()
        encoded_feature = label_encoder.fit_transform(df['nftype'])
        df['nftype'] = encoded_feature            
    
    # FBP
    elif fsm == 3:
        df = df[['fueltype', 'ffmc', 'ws', 'bui', 'ps', 'saz', 'HROS', 'BROS', 'FROS']]
        label_encoder = LabelEncoder()
        encoded_feature = label_encoder.fit_transform(df['fueltype'])
        df['fueltype'] = encoded_feature

    # Select ROS mode (3-ROS vs 1-ROS output)
    ros_num = -3 if not ros_mode else -1
    # print("3-ROS output" if not ros_mode else "Single ROS output with mode: ", ros_mode)

    if 'num' in df.columns:
        df = df.drop(columns = ['num'] + drop_var) 
    if ros_mode == "HROS" or ros_mode == "hros":
        df = df.drop(columns = ['BROS', 'FROS'])
    elif ros_mode == "BROS" or ros_mode == "bros":
        df = df.drop(columns = ['HROS', 'FROS'])
    elif ros_mode == "FROS" or ros_mode == "fros":
        df = df.drop(columns = ['BROS', 'HROS'])

    df = df.dropna().reset_index(drop=True)
    
    n_inputs = len(df.keys()[:ros_num]) # 1 ROS outputs
    X_train = df.iloc[:, :ros_num].values
    y_train = df.iloc[:,ros_num:]
    
    # Prepare data for ML
    train_features = np.array(X_train)
    train_labels = np.array(y_train).reshape((-1, ros_num*-1)) # 3ROS
    
    return train_features, train_labels

def main_run(path, model_list, ros_modes=None, fsm=None, inference=False, export=False, n_cores=n_cores, drop_var=[]):

    # Predict loop
    all_results = {'model':[], 'ros_mode':[], 'mape':[], 'mse':[], 'time':[]}

    for _model in tqdm.tqdm(model_list, total=len(model_list), desc='Training ML models...'):
        # print('MODEL: ', _model)

        for ros_mode in ros_modes:
            # print('ROS MODE: ', ros_mode)

            ### Prepare training data
            training_features, training_targets = train_data(path, ros_mode, fsm, drop_var)

            # Set feature names based on FSM (For SHAP analysis)
            if fsm == 1:
                fnames = ["Fuel Type","Wind Speed","Wind Direction","Slope","1H Moisture Content","10H Moisture Content",
                        "100H Moisture Content","Herbaceous Moisture Content","Woody Moisture Content",
                        "1H Load","10H Load","100H Load","Herbaceous Load","Woody Load",
                        "1H SAV","Herbaceous SAV","Woody SAV","Fuel Bed Depth","Dead Moisture of Extinction"]

            elif fsm == 2:
                fnames = ["Fuel Type", "Speed", "Fuel Load", "Heat", "Moisture Content", "Wind Speed", 
                        "Slope", "Moisture Content\nFactor", "Slope Factor", "Wind Factor"]

            elif fsm == 3:
                fnames = ["Fuel Type", "FFMC", "Wind Speed", "BUI", "Slope","Aspect"]

            elif fsm == 4: # Truncated BP6
                fnames = ["Fuel Type","Wind Speed","Wind Direction","Slope","1H Moisture Content",
                        "1H Load","10H Load","100H Load","Herbaceous Load","Woody Load",
                        "1H SAV","Herbaceous SAV","Woody SAV","Fuel Bed Depth","Dead Moisture of Extinction"]    

            X_train, X_test, y_train, y_test = train_test_split(training_features, training_targets, test_size=0.25, random_state=42) 

            ### Train model
            model_name = _model
            rand_idx=0
            mape_loss, mse_loss, model, time = model_fit(path=path, model_name=model_name, train_features=X_train, train_labels=y_train, n_cores=n_cores,
                                        rand_idx=rand_idx, fsm=fsm, inference=inference, export=export, drop_var=drop_var)

            all_results['model'].append(_model)
            all_results['ros_mode'].append(ros_mode)
            all_results['mape'].append(mape_loss)
            all_results['mse'].append(mse_loss)
            all_results['time'].append(time)    

    return all_results

In [3]:
# Set paths
base_path = '/Users/minho/Desktop/Cell2FireML/Github/data'

## Single-output ROS predictions

In [5]:
# ML model's parameters
# ros_modes = ['HROS']                                  # Single ROS modes include: "HROS", "BROS", "FROS" (Must be in list format)            
drop_var=[]                                             # Drop variables in training data csv file (optional)
inference=True                                          # Inference option
export=False                                            # Export option to port trained ML model for C++
n_cores = 4                                             # Number of cores for xgboost
model_list = ['gbr', 'nn', 'knn', 'rf', 'xgboost']      # List of ML models to train

#### BehavePlus V6 (US)

In [6]:
### BehavePlus (US): FSM=1
fsm = 1 # 1: BehavePlus | 2: KITRAL | 3: FBP
path = os.path.join(base_path, 'bp6_training_data.csv')

# Train ML models for single ROS outputs
hros_results1 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['HROS'])
bros_results1 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['BROS'])
fros_results1 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['FROS'])

Training ML models...: 100%|██████████| 5/5 [12:12<00:00, 146.60s/it]
Training ML models...: 100%|██████████| 5/5 [08:22<00:00, 100.54s/it]
Training ML models...: 100%|██████████| 5/5 [07:58<00:00, 95.74s/it] 


In [7]:
# Print outputs
hros_results1 = pd.DataFrame(hros_results1)
bros_results1 = pd.DataFrame(bros_results1)
fros_results1 = pd.DataFrame(fros_results1)

hros_results1.head(1)

Unnamed: 0,model,ros_mode,mape,mse,time
0,gbr,HROS,1.14142,3834.934234,0.00053


#### KITRAL (Chile)

In [9]:
### KITRAL (Chile): FSM=2
fsm = 2 # 1: BehavePlus | 2: KITRAL | 3: FBP
path = os.path.join(base_path, 'kitral_training_data.csv')

# Train ML models for single ROS outputs
hros_results2 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['HROS'])
bros_results2 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['BROS'])
fros_results2 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['FROS'])

Training ML models...: 100%|██████████| 5/5 [01:16<00:00, 15.39s/it]
Training ML models...: 100%|██████████| 5/5 [00:23<00:00,  4.72s/it]
Training ML models...: 100%|██████████| 5/5 [00:28<00:00,  5.65s/it]


In [10]:
# Print outputs
hros_results2 = pd.DataFrame(hros_results2)
bros_results2 = pd.DataFrame(bros_results2)
fros_results2 = pd.DataFrame(fros_results2)

hros_results2.head(1)

Unnamed: 0,model,ros_mode,mape,mse,time
0,gbr,HROS,1.888335,66.455775,0.000674


#### FBP (Canada)

In [11]:
### FBP (Canada): FSM=3
fsm = 3 # 1: BehavePlus | 2: KITRAL | 3: FBP
path = os.path.join(base_path, 'fbp_training_data.csv')

# Train ML models for single ROS outputs
hros_results3 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['HROS'])
bros_results3 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['BROS'])
fros_results3 = main_run(path, model_list, fsm=fsm, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=['FROS'])

Training ML models...: 100%|██████████| 5/5 [05:48<00:00, 69.61s/it]
Training ML models...: 100%|██████████| 5/5 [06:09<00:00, 73.91s/it] 
Training ML models...: 100%|██████████| 5/5 [05:55<00:00, 71.05s/it] 


In [12]:
# Print outputs
hros_results3 = pd.DataFrame(hros_results3)
bros_results3 = pd.DataFrame(bros_results3)
fros_results3 = pd.DataFrame(fros_results3)

hros_results3.head(1)

Unnamed: 0,model,ros_mode,mape,mse,time
0,gbr,HROS,0.863488,62.842904,0.000499


#### Multi-output ROS predictions (3-ROS simultaneously)

In [24]:
# BehavePlus (US)
path = os.path.join(base_path, 'bp6_training_data.csv')
ros_results1 = main_run(path, model_list, fsm=1, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=[None])

# # KITRAL (Chile) 
# path = os.path.join(base_path, 'kitral_training_data.csv')
# ros_results2 = main_run(path, model_list, fsm=2, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=None)

# # FBP (Canada)
# path = os.path.join(base_path, 'fbp_training_data.csv')
# ros_results3 = main_run(path, model_list, fsm=3, inference=inference, export=export, drop_var=drop_var, n_cores=n_cores, ros_modes=None)

Training ML models...: 100%|██████████| 5/5 [14:01<00:00, 168.35s/it]
