# Experiments Management
This notebook shows how to use the [experiment_management.py](https://github.com/abreukuse/ml_utilities/blob/master/experiments_management.py) module. You can use the function `experiment_manager` in the module as a way to track and record machine learning experiments results with the help of [mlflow](https://mlflow.org/docs/latest/index.html).<br>
It is possible to try several configurations of hyperparameters. Also, it is possible to choose between a simple split in the data or cross-validation in both tasks: classification and regression.

In [1]:
# requirements:

# feature_engine==1.0.2
# scikit-learn==0.24.1
# numpy==1.19.3
# pandas==1.2.3

In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer, load_diabetes
from feature_engine.wrappers import SklearnTransformerWrapper
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, FunctionTransformer
from sklearn.model_selection import KFold, RepeatedKFold
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, make_scorer, roc_auc_score, log_loss
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, mean_squared_log_error
from experiments_management import experiment_manager

## Classification

In [3]:
data = load_breast_cancer()

In [4]:
X = pd.DataFrame(data.data, columns=data.feature_names)
X.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [5]:
y = pd.Series(data.target)
y.head()

0    0
1    0
2    0
3    0
4    0
dtype: int32

### One Experiment

In [6]:
RANDOM_STATE = 77
TEST_SIZE = 0.3

In [7]:
pipeline_logistic_regression = make_pipeline(SklearnTransformerWrapper(transformer=StandardScaler()),
                                             LogisticRegression())

In [8]:
# Setting the hyperparameters search space for logistic regression

def runs_logistic_regression():
    choose = np.random.choice

    C = choose([0.001,0.01,0.1,1,10,100,1000])
    max_iter = choose(range(3000, 10500, 1000))
    solver = choose(['lbfgs', 'sag', 'saga'])
    transformer = choose([StandardScaler(), MinMaxScaler()])

    hyperparameters = {'logisticregression__C': C,
                       'logisticregression__max_iter': max_iter,
                       'logisticregression__solver': solver,
                       'sklearntransformerwrapper__transformer': transformer,
                       'logisticregression__random_state': RANDOM_STATE}
    
    return hyperparameters

In [9]:
metrics = {'precision': precision_score,
           'recall': recall_score,
           'f1_score': f1_score,
           'accuracy': accuracy_score,
           'auc': roc_auc_score,
           'log_loss': log_loss}

#### Simple Split

In [10]:
experiment_manager(task='classification',
                   pipeline=pipeline_logistic_regression, 
                   X=X, 
                   y=y, 
                   runs=3,
                   validation='simple_split',
                   hyperparameters=runs_logistic_regression,
                   metrics=metrics,
                   random_state=RANDOM_STATE,
                   test_size=TEST_SIZE)

Experiment Name: LogisticRegression
Experiment_id: 1

Run 1

HYPERPARAMETERS
C: 1.0
max_iter: 5000
solver: lbfgs
transformer: MinMaxScaler()
random_state: 77

SCORES
train_precision: 0.929
test_precision: 0.932
train_recall: 1.0
test_recall: 1.0
train_f1_score: 0.963
test_f1_score: 0.965
train_accuracy: 0.952
test_accuracy: 0.953
train_auc: 0.993
test_auc: 0.995
train_log_loss: 0.188
test_log_loss: 0.177

Run 2

HYPERPARAMETERS
C: 1000.0
max_iter: 10000
solver: lbfgs
transformer: MinMaxScaler()
random_state: 77

SCORES
train_precision: 0.954
test_precision: 0.956
train_recall: 0.996
test_recall: 1.0
train_f1_score: 0.974
test_f1_score: 0.978
train_accuracy: 0.967
test_accuracy: 0.971
train_auc: 0.994
test_auc: 0.993
train_log_loss: 0.139
test_log_loss: 0.131

Run 3

HYPERPARAMETERS
C: 0.001
max_iter: 5000
solver: lbfgs
transformer: StandardScaler()
random_state: 77

SCORES
train_precision: 0.988
test_precision: 0.973
train_recall: 0.992
test_recall: 1.0
train_f1_score: 0.99
test_f1_sco

#### Cross Validation

In [None]:
experiment_manager(task='classification',
                   pipeline=pipeline_logistic_regression, 
                   X=X, 
                   y=y, 
                   runs=3,
                   validation='cross_validation',
                   hyperparameters=runs_logistic_regression,
                   metrics=metrics,
                   random_state=RANDOM_STATE,
                   cv_method=KFold(n_splits=2, shuffle=True, random_state=RANDOM_STATE),
                   n_splits=2)

Experiment Name: LogisticRegression
Experiment_id: 1

Run 1

HYPERPARAMETERS
C: 0.001
max_iter: 5000
solver: lbfgs
transformer: MinMaxScaler()
random_state: 77

SCORES
train_precision: 0.989
test_precision: 0.969
train_recall: 0.997
test_recall: 0.978
train_f1_score: 0.993
test_f1_score: 0.974
train_accuracy: 0.991
test_accuracy: 0.967
train_auc: 0.998
test_auc: 0.994
train_log_loss: 0.043
test_log_loss: 0.085



### More than one experiment

In [None]:
# I will add the decision tree classifier and run together with the logistic regression
pipeline_decision_tree = make_pipeline(DecisionTreeClassifier())

In [None]:
# Search space for decision trees

def runs_decision_tree():
    choose = np.random.choice

    max_depth = choose(range(2, 16, 2))
    min_samples_split  = choose([5,8,10,12,15,20])
    min_samples_leaf = choose([5,8,10,12,15,20])
    max_features = choose(np.arange(0.1,1.1,0.1))
    min_impurity_decrease = choose(np.arange(0.1,0.6,0.1))
    class_weight = choose([None, 'balanced'])

    hyperparameters = {'decisiontreeclassifier__max_depth': max_depth,
                       'decisiontreeclassifier__min_samples_split': min_samples_split,
                       'decisiontreeclassifier__min_samples_leaf': min_samples_leaf,
                       'decisiontreeclassifier__max_features': max_features,
                       'decisiontreeclassifier__min_impurity_decrease': min_impurity_decrease,
                       'decisiontreeclassifier__class_weight': class_weight,
                       'decisiontreeclassifier__random_state': RANDOM_STATE}
    
    return hyperparameters

In [None]:
metrics = {'precision': precision_score,
           'recall': recall_score,
           'f1_score': f1_score,
           'accuracy': accuracy_score,
           'auc': roc_auc_score,
           'log_loss': log_loss}

In [None]:
# Run both experiments: logistic regression and decision trees

experiments = ((pipeline_logistic_regression, runs_logistic_regression),
               (pipeline_decision_tree, runs_decision_tree))

In [None]:
for pipeline, hyperparameters in experiments:
    experiment_manager(task='classification',
                       pipeline=pipeline, X=X, y=y, 
                       runs=5,
                       validation='simple_split',
                       hyperparameters=hyperparameters,
                       metrics=metrics,
                       random_state=RANDOM_STATE,
                       test_size=TEST_SIZE)

## Regression

In [None]:
data = load_diabetes()

In [None]:
X = pd.DataFrame(data.data, columns=data.feature_names)
X.head()

In [None]:
y = pd.Series(data.target)
y.head()

In [None]:
ridge = make_pipeline(Ridge())
decision_trees = make_pipeline(DecisionTreeRegressor())

In [None]:
# hyperparameter space:

# ridge
def runs_ridge():
    choose = np.random.choice

    alpha = choose([0.01,0.1,1,10,100])
    max_iter = choose(range(1000, 10500, 1000))
    solver = choose(['cholesky', 'sag', 'saga', 'svd', 'auto'])

    hyperparameters = {'ridge__alpha': alpha,
                       'ridge__max_iter': max_iter,
                       'ridge__solver': solver,
                       'ridge__random_state': RANDOM_STATE}
    
    return hyperparameters

# decision tree
def runs_decision_tree():
    choose = np.random.choice

    max_depth = choose(range(2, 16, 2))
    min_samples_split  = choose([5,8,10,12,15,20])
    min_samples_leaf = choose([5,8,10,12,15,20])
    max_features = choose(np.arange(0.1,1.1,0.1))
    min_impurity_decrease = choose(np.arange(0.1,0.6,0.1))

    hyperparameters = {'decisiontreeregressor__max_depth': max_depth,
                       'decisiontreeregressor__min_samples_split': min_samples_split,
                       'decisiontreeregressor__min_samples_leaf': min_samples_leaf,
                       'decisiontreeregressor__max_features': max_features,
                       'decisiontreeregressor__min_impurity_decrease': min_impurity_decrease,
                       'decisiontreeregressor__random_state': RANDOM_STATE}
    
    return hyperparameters

In [None]:
experiments = ((ridge, runs_ridge),
               (decision_trees, runs_decision_tree))

### Simple Split

In [None]:
metrics = {'rmse': mean_squared_error,
           'mae': mean_absolute_error,
           'mape': mean_absolute_percentage_error,
           'msle': mean_squared_log_error}

In [None]:
for pipeline, hyperparameters in experiments:
    experiment_manager(task='regression',
                       pipeline=pipeline, X=X, y=y, 
                       runs=5,
                       validation='simple_split',
                       hyperparameters=hyperparameters,
                       metrics=metrics,
                       random_state=RANDOM_STATE,
                       test_size=TEST_SIZE)

### Cross Validation

In [None]:
# If the target is transformed, you need to provide the inverse transformation as a function
# using the 'inverse' argument avaivable as a kwargs.

y_log = np.log1p(y)

for pipeline, hyperparameters in experiments:
    experiment_manager(task='regression',
                       pipeline=pipeline, X=X, y=y_log, 
                       runs=5,
                       validation='cross_validation',
                       cv_method=RepeatedKFold(n_splits=2, n_repeats=2, random_state=RANDOM_STATE),
                       hyperparameters=hyperparameters,
                       metrics=metrics,
                       inverse=np.expm1)