# How to run a model using `fowt_ml` and calculate scores

## Data preparation (common for any type of model)

In [1]:
from fowt_ml.config import read_yaml
from fowt_ml.datasets import get_data
from sklearn.model_selection import train_test_split

In [2]:
example_config_file = "../../src/example_config.yml"

In [3]:
config = read_yaml(example_config_file)
config["data"]["exp699"]["mat_file"] = "/home/sarah/temp/hybridlabs/data_example/exp699.mat"
df = get_data("exp699", config["data"])

In [4]:
# train/test split
predictors_labels = config["ml_setup"]["predictors"]
target_labels = config["ml_setup"]["targets"]

X_data = df[predictors_labels]
Y_data = df[target_labels]

X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, test_size=0.25, shuffle=True, random_state=123)

## Linear models 

In [9]:
from fowt_ml.linear_models import LinearModels
LinearModels.ESTIMATOR_NAMES

{'LinearRegression': sklearn.linear_model._base.LinearRegression,
 'RidgeRegression': sklearn.linear_model._ridge.Ridge,
 'LassoRegression': sklearn.linear_model._coordinate_descent.Lasso,
 'ElasticNetRegression': sklearn.linear_model._coordinate_descent.ElasticNet,
 'LeastAngleRegression': sklearn.linear_model._least_angle.Lars}

In [10]:
# calculate metrics
model_name = "LeastAngleRegression"
model = LinearModels(model_name)

metrics = ["neg_mean_squared_error", "neg_root_mean_squared_error", "r2", "model_fit_time"]
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_squared_error': -7.142729846925139,
 'neg_root_mean_squared_error': -2.262318094629433,
 'r2': 0.4049439308312828,
 'model_fit_time': np.float64(0.11)}

## Random Forest

In [1]:
from fowt_ml.ensemble import EnsembleModel
EnsembleModel.ESTIMATOR_NAMES

{'ExtraTrees': sklearn.ensemble._forest.ExtraTreesRegressor,
 'RandomForest': sklearn.ensemble._forest.RandomForestRegressor}

In [7]:
model_name = "RandomForest"
model = EnsembleModel(estimator=model_name, max_depth=9, bootstrap=True, max_samples=10_000, n_estimators=50)

scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_squared_error': -7.030263273594386,
 'neg_root_mean_squared_error': -2.2297171541025516,
 'r2': 0.43169729068927937,
 'model_fit_time': np.float64(5.73)}

In [9]:
# using out-of-bag samples to estimate the generalization score
score = model.oob_score(X_train, y_train, scoring="neg_root_mean_squared_error")
score



-2.2322728182947276

## Gaussian Process

In [10]:
from fowt_ml.gaussian_process import SparseGaussianModel
SparseGaussianModel.ESTIMATOR_NAMES

{'SklearnGPRegressor': fowt_ml.gaussian_process.SklearnGPRegressor}

In [11]:
model_name = "SklearnGPRegressor"
params = {
    "num_inducing": 100,  # larger will takes longer
    "num_latents": 3,
}

model = SparseGaussianModel(model_name, **params)

metrics = ["neg_mean_squared_error", "neg_root_mean_squared_error", "r2", "model_fit_time"]
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores

{'neg_mean_squared_error': -9.100683239392092,
 'neg_root_mean_squared_error': -2.50777337759925,
 'r2': 0.3374002291651839,
 'model_fit_time': np.float64(93.17)}

## MLP

In [5]:
from fowt_ml.neural_network import NN
NN.ESTIMATOR_NAMES

{'MultilayerPerceptron': sklearn.neural_network._multilayer_perceptron.MLPRegressor}

In [6]:
model_name = "MultilayerPerceptron"
params = {
    "hidden_layer_sizes": 10,  # larger will takes longer
    "max_iter": 10,
}
model = NN(model_name, **params)

metrics = ["neg_mean_squared_error", "neg_root_mean_squared_error", "r2", "model_fit_time"]
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics) 
scores



{'neg_mean_squared_error': -7.289962257367833,
 'neg_root_mean_squared_error': -2.3049256494653982,
 'r2': 0.3319488674638264,
 'model_fit_time': np.float64(8.66)}