# How to run a model using `fowt_ml` and calculate scores

## Data preparation (common for any type of model)

In [3]:
from fowt_ml.datasets import get_data, fix_column_names
from fowt_ml import Config
from sklearn.model_selection import train_test_split

In [4]:
example_config_file = "../../src/example_config.yml"

In [5]:
config = Config.from_yaml(example_config_file)
config["data"]["exp699"]["path_file"] = "/home/sarah/temp/hybridlabs/data_example/exp699.mat"
df = get_data("exp699", config["data"])

In [6]:
# train/test split
predictors_labels = config["ml_setup"]["predictors"]
target_labels = config["ml_setup"]["targets"]

# rename the column names to exclude []
X_data = df.loc[:, predictors_labels]
Y_data = df.loc[:, target_labels]

X_data, Y_data = fix_column_names(X_data), fix_column_names(Y_data)
train_test_split_kwargs = config["ml_setup"]["train_test_split_kwargs"]
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, **train_test_split_kwargs)

In [7]:
metrics = config["ml_setup"]["metric_names"]
metrics

['neg_mean_absolute_error',
 'neg_root_mean_squared_error',
 'r2',
 'model_fit_time',
 'model_predict_time']

## Linear models 

In [6]:
from fowt_ml import LinearModels
LinearModels.ESTIMATOR_NAMES

{'LinearRegression': sklearn.linear_model._base.LinearRegression,
 'RidgeRegression': sklearn.linear_model._ridge.Ridge,
 'LassoRegression': sklearn.linear_model._coordinate_descent.Lasso,
 'ElasticNetRegression': sklearn.linear_model._coordinate_descent.ElasticNet,
 'LeastAngleRegression': sklearn.linear_model._least_angle.Lars}

In [7]:
# calculate metrics
model_name = "LeastAngleRegression"
model = LinearModels(model_name)

model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_absolute_error': -1.603052611245188,
 'neg_root_mean_squared_error': -2.1309153094889486,
 'r2': 0.10732388900915783,
 'model_fit_time': np.float64(0.096),
 'model_predict_time': np.float64(0.001)}

In [8]:
scores = model.cross_validate(X_train, y_train, metrics) 
scores

{'model_fit_time': array([0.109, 0.085, 0.081, 0.08 , 0.082]),
 'neg_mean_absolute_error': array([-1.60136019, -1.60960767, -1.6112611 , -1.61425787, -1.59912871]),
 'neg_root_mean_squared_error': array([-2.13342478, -2.13724123, -2.14536043, -2.15210295, -2.1258841 ]),
 'r2': array([0.10649117, 0.10830421, 0.10762699, 0.10839059, 0.10954135]),
 'model_predict_time': array([0.001, 0.001, 0.001, 0.001, 0.001])}

## Random Forest

In [9]:
from fowt_ml import EnsembleModel
EnsembleModel.ESTIMATOR_NAMES

{'ExtraTrees': sklearn.ensemble._forest.ExtraTreesRegressor,
 'RandomForest': sklearn.ensemble._forest.RandomForestRegressor}

In [10]:
model_name = "RandomForest"
model = EnsembleModel(estimator=model_name, max_depth=9, bootstrap=True, max_samples=10_000, n_estimators=50)

model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_absolute_error': -1.5959389437050893,
 'neg_root_mean_squared_error': -2.1135488771633013,
 'r2': 0.12112219032333066,
 'model_fit_time': np.float64(5.78),
 'model_predict_time': np.float64(0.003)}

## Gaussian Process

In [12]:
from fowt_ml import SparseGaussianModel
SparseGaussianModel.ESTIMATOR_NAMES

{'SklearnGPRegressor': fowt_ml.gaussian_process.SklearnGPRegressor}

In [13]:
model_name = "SklearnGPRegressor"
params = config["ml_setup"]["model_names"][model_name]

model = SparseGaussianModel("SklearnGPRegressor", **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_absolute_error': -1.5981088588623225,
 'neg_root_mean_squared_error': -2.1218047367573445,
 'r2': 0.12320574147031532,
 'model_fit_time': np.float64(88.066),
 'model_predict_time': np.float64(0.021)}

## MLP

In [10]:
from fowt_ml import NeuralNetwork
NeuralNetwork.ESTIMATOR_NAMES

{'MultilayerPerceptron': sklearn.neural_network._multilayer_perceptron.MLPRegressor,
 'RNNRegressor': fowt_ml.neural_network.SklearnRNNRegressor,
 'LSTMRegressor': fowt_ml.neural_network.SklearnLSTMRegressor,
 'GRURegressor': fowt_ml.neural_network.SklearnGRURegressor}

In [11]:
model_name = "MultilayerPerceptron"
params = config["ml_setup"]["model_names"][model_name]

model = NeuralNetwork(model_name, **params)

In [12]:
model.estimator

In [None]:
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_absolute_error': -1.5981261086400942,
 'neg_root_mean_squared_error': -2.1186915415839853,
 'r2': 0.12120288013830806,
 'model_fit_time': np.float64(11.438),
 'model_predict_time': np.float64(0.001)}

## XGBoots

In [16]:
from fowt_ml import XGBoost
XGBoost.ESTIMATOR_NAMES

{'XGBoostRegression': xgboost.sklearn.XGBRegressor}

In [17]:
model_name = "XGBoostRegression"
params = config["ml_setup"]["model_names"][model_name]
model = XGBoost(model_name, **params)

model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_absolute_error': -1.581722378730774,
 'neg_root_mean_squared_error': -2.0865964889526367,
 'r2': 0.15320807695388794,
 'model_fit_time': np.float64(2.811),
 'model_predict_time': np.float64(0.001)}

## RNN models

In [8]:
from fowt_ml import NeuralNetwork
import numpy as np
NeuralNetwork.ESTIMATOR_NAMES

{'MultilayerPerceptron': sklearn.neural_network._multilayer_perceptron.MLPRegressor,
 'RNNRegressor': <function fowt_ml.neural_network.RNNRegressor(**args)>,
 'LSTMRegressor': <function fowt_ml.neural_network.LSTMRegressor(**args)>,
 'GRURegressor': <function fowt_ml.neural_network.GRURegressor(**args)>}

In [9]:
# for torch based models, this is needed
X_train = np.asarray(X_train, dtype=np.float32)
X_test = np.asarray(X_test, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.float32)

In [11]:
model_name = "RNNRegressor"
params = {
    "input_size": len(predictors_labels), 
    "hidden_size": 64, 
    "output_size": len(target_labels), 
    "num_layers":2,
}

In [12]:
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()

<fowt_ml.neural_network.NeuralNetwork at 0x772ec59b3a10>

In [13]:
model.estimator.fit(X_train, y_train)

In [None]:
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

  epoch    train_loss    valid_loss      dur
-------  ------------  ------------  -------
      1        [36m0.9143[0m        [32m0.8909[0m  24.6896
      2        [36m0.8943[0m        [32m0.8866[0m  26.2002
      3        [36m0.8915[0m        [32m0.8850[0m  33.9241
      4        [36m0.8902[0m        [32m0.8840[0m  36.8488
      5        [36m0.8893[0m        [32m0.8833[0m  38.8893
      6        [36m0.8886[0m        [32m0.8827[0m  37.3335
      7        [36m0.8881[0m        [32m0.8823[0m  40.8689
      8        [36m0.8876[0m        [32m0.8819[0m  36.2290
      9        [36m0.8872[0m        [32m0.8815[0m  36.1466
     10        [36m0.8869[0m        [32m0.8812[0m  40.8171


{'neg_mean_absolute_error': -1.6005001068115234,
 'neg_root_mean_squared_error': -2.124774694442749,
 'r2': 0.11344795674085617,
 'model_fit_time': np.float64(352.913),
 'model_predict_time': np.float64(0.001)}

In [10]:
model_name = "LSTMRegressor"
params = {
    "input_size": len(predictors_labels), 
    "hidden_size": 64, 
    "output_size": len(target_labels), 
    "num_layers":2,
    "max_epochs":5,
}

In [11]:
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.0000[0m        [32m0.9892[0m  6.0975
      2        [36m0.9963[0m        [32m0.9848[0m  6.2456
      3        [36m0.9905[0m        [32m0.9776[0m  6.3866
      4        [36m0.9819[0m        [32m0.9680[0m  6.6953
      5        [36m0.9717[0m        [32m0.9581[0m  7.1940


{'neg_mean_absolute_error': -1.6250715255737305,
 'neg_root_mean_squared_error': -2.1550610065460205,
 'r2': 0.03436806797981262,
 'model_fit_time': np.float64(33.554),
 'model_predict_time': np.float64(0.001)}

In [19]:
model_name = "GRURegressor"
params = {
    "input_size": len(predictors_labels), 
    "hidden_size": 64, 
    "output_size": len(target_labels), 
    "num_layers":2,
    "max_epochs": 5,
}

In [20]:
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

  epoch    train_loss    valid_loss      dur
-------  ------------  ------------  -------
      1        [36m0.9858[0m        [32m0.9623[0m  81.2824
      2        [36m0.9600[0m        [32m0.9410[0m  107.4753
      3        [36m0.9390[0m        [32m0.9225[0m  109.0925
      4        [36m0.9237[0m        [32m0.9114[0m  115.3645
      5        [36m0.9143[0m        [32m0.9039[0m  117.8505


{'neg_mean_absolute_error': -1.6065346002578735,
 'neg_root_mean_squared_error': -2.1341967582702637,
 'r2': 0.08973821997642517,
 'model_fit_time': np.float64(531.139),
 'model_predict_time': np.float64(0.001)}