# How to run a model using `fowt_ml` and calculate scores

## Data preparation (common for any type of model)

In [1]:
from fowt_ml.datasets import get_data, fix_column_names
from fowt_ml import Config
from sklearn.model_selection import train_test_split

In [2]:
example_config_file = "../../src/example_config.yml"

In [3]:
config = Config.from_yaml(example_config_file)
config["data"]["exp699"]["path_file"] = "/home/sarah/temp/hybridlabs/data_example/exp699.mat"
df = get_data("exp699", config["data"])

In [4]:
# train/test split
predictors_labels = config["ml_setup"]["predictors"]
target_labels = config["ml_setup"]["targets"]

# rename the column names to exclude []
X_data = df.loc[:, predictors_labels]
Y_data = df.loc[:, target_labels]

X_data, Y_data = fix_column_names(X_data), fix_column_names(Y_data)
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, test_size=0.25, shuffle=True, random_state=123)

## Linear models 

In [5]:
from fowt_ml import LinearModels
LinearModels.ESTIMATOR_NAMES

{'LinearRegression': sklearn.linear_model._base.LinearRegression,
 'RidgeRegression': sklearn.linear_model._ridge.Ridge,
 'LassoRegression': sklearn.linear_model._coordinate_descent.Lasso,
 'ElasticNetRegression': sklearn.linear_model._coordinate_descent.ElasticNet,
 'LeastAngleRegression': sklearn.linear_model._least_angle.Lars}

In [6]:
# calculate metrics
model_name = "LeastAngleRegression"
model = LinearModels(model_name)

metrics = ["neg_mean_squared_error", "neg_root_mean_squared_error", "r2", "model_fit_time"]
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_squared_error': -7.142729846925139,
 'neg_root_mean_squared_error': -2.262318094629433,
 'r2': 0.4049439308312828,
 'model_fit_time': np.float64(0.094)}

In [7]:
scores = model.cross_validate(X_train, y_train, metrics) 
scores

{'model_fit_time': array([0.081, 0.064, 0.064, 0.129, 0.114]),
 'neg_mean_squared_error': array([-7.22231853, -7.17974437, -7.13758398, -7.19825648, -7.23040949]),
 'neg_root_mean_squared_error': array([-2.27593291, -2.26982539, -2.26135329, -2.27097107, -2.27748786]),
 'r2': array([0.40378214, 0.39918316, 0.40149844, 0.40307672, 0.40292768])}

## Random Forest

In [8]:
from fowt_ml import EnsembleModel
EnsembleModel.ESTIMATOR_NAMES

{'ExtraTrees': sklearn.ensemble._forest.ExtraTreesRegressor,
 'RandomForest': sklearn.ensemble._forest.RandomForestRegressor}

In [9]:
model_name = "RandomForest"
model = EnsembleModel(estimator=model_name, max_depth=9, bootstrap=True, max_samples=10_000, n_estimators=50)

scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_squared_error': -7.022166318991772,
 'neg_root_mean_squared_error': -2.228100515320055,
 'r2': 0.43153118420094483,
 'model_fit_time': np.float64(6.148)}

In [10]:
# using out-of-bag samples to estimate the generalization score
score = model.oob_score(X_train, y_train, scoring="neg_root_mean_squared_error")
score



-2.23571881144511

## Gaussian Process

In [11]:
from fowt_ml import SparseGaussianModel
SparseGaussianModel.ESTIMATOR_NAMES

{'SklearnGPRegressor': fowt_ml.gaussian_process.SklearnGPRegressor}

In [12]:
model_name = "SklearnGPRegressor"
params = {
    "num_inducing": 100,  # larger will takes longer
    "num_latents": 3,
}

model = SparseGaussianModel(model_name, **params)

metrics = ["neg_mean_squared_error", "neg_root_mean_squared_error", "r2", "model_fit_time"]
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_squared_error': -9.100683239392092,
 'neg_root_mean_squared_error': -2.50777337759925,
 'r2': 0.3374002291651839,
 'model_fit_time': np.float64(125.812)}

## MLP

In [5]:
from fowt_ml import NeuralNetwork
NeuralNetwork.ESTIMATOR_NAMES

{'MultilayerPerceptron': sklearn.neural_network._multilayer_perceptron.MLPRegressor}

In [8]:
model_name = "MultilayerPerceptron"
params = {
    "hidden_layer_sizes": 10,  # larger will takes longer
    "max_iter": 50,
}
model = NeuralNetwork(model_name, **params)

metrics = ["neg_mean_squared_error", "neg_root_mean_squared_error", "r2", "model_fit_time"]
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores



{'neg_mean_squared_error': -7.575365060051074,
 'neg_root_mean_squared_error': -2.3274787520012556,
 'r2': 0.37409258637505166,
 'model_fit_time': np.float64(29.013)}

## XGBoots

In [15]:
from fowt_ml import XGBoost
XGBoost.ESTIMATOR_NAMES

{'XGBoostRegression': xgboost.sklearn.XGBRegressor}

In [16]:
model_name = "XGBoostRegression"
params = {
    "tree_method": "hist",  
    "n_estimators": 100,  # larger takes longer
    "max_depth": 4,
}
model = XGBoost(model_name, **params)

metrics = ["neg_mean_squared_error", "neg_root_mean_squared_error", "r2", "model_fit_time"]
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_squared_error': -5.9767374992370605,
 'neg_root_mean_squared_error': -2.0507843494415283,
 'r2': 0.513055145740509,
 'model_fit_time': np.float64(10.398)}