In [2]:
import pandas as pd
import numpy as np

from pprint import pprint

from sklearn.metrics import make_scorer, precision_score
from sklearn.model_selection import RandomizedSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

In [13]:
# -----------------------
# RANDOMGRIDSEARCH: RANDOM FOREST CLASSIFIER
# -----------------------
# https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74
# https://towardsdatascience.com/fine-tuning-a-classifier-in-scikit-learn-66e048c21e65

def start_randomizedSearchCV_random_forest(X_train, y_train, 
                                           n_iter = 2, cv = 3, verbose=20, n_jobs = 3): #n_jobs = -1
   # Number of trees in random forest
    n_estimators = [int(x) for x in np.linspace(start = 25, stop = 90, num = 10)]
    # Number of features to consider at every split
    max_features = ['auto', 'sqrt'] # TODO : Change with numbers ?
    # Maximum number of levels in tree
    max_depth = [int(x) for x in np.linspace(4, 10, num = 6)]
    # Minimum number of samples required to split a node
    min_samples_split = [2, 5, 10]
    # Minimum number of samples required at each leaf node
    min_samples_leaf = [1, 2, 4]
    # Method of selecting samples for training each tree
    bootstrap = [True, False]
    criterion = ['gini', 'entropy']

    # Create the random grid
    random_grid = {'n_estimators': n_estimators,
                   'max_features': max_features,
                   'max_depth': max_depth,
                   'min_samples_split': min_samples_split,
                   'min_samples_leaf': min_samples_leaf,
                   'bootstrap': bootstrap,
                   'criterion': criterion}
    #pprint(random_grid)

    scorers = {
        'precision_score': make_scorer(precision_score)
    }
    #scorers = {
    #    'precision_score': make_scorer(precision_score),
    #    'recall_score': make_scorer(recall_score),
    #    'accuracy_score': make_scorer(accuracy_score)
    #}
    #pprint(scorers)    
    
    rf = RandomForestClassifier(random_state=0, class_weight="balanced")
    # Random search of parameters, using 5 fold cross validation, 
    # search across 100 different combinations, and use all available cores
    rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = n_iter, cv = cv, verbose=verbose, random_state=0, n_jobs = n_jobs,
                                  scoring=scorers, refit='precision_score')
    # Fit the random search model
    rf_random.fit(X_train, y_train)
    # best model + best params
    return rf_random.best_estimator_, rf_random.best_params_

In [None]:
# -----------------------
# RANDOMGRIDSEARCH: MLPC CLASSIFIER
# -----------------------
def start_randomizedSearchCV_mlpc(X_train, y_train, 
                                           n_iter = 2, cv = 3, verbose=20, n_jobs = 3, max_iter=100): #n_jobs = -1
    # TODO : A paramétrer proprement
    random_grid = {
        'hidden_layer_sizes': [(130, 130), (130,130,130), (100,)],
        'activation': ['tanh', 'relu'],
        'solver': ['lbfgs', 'sgd', 'adam'],
        'alpha': [0.0001, 0.05],
        'learning_rate': ['constant','adaptive'],
    }

    scorers = {
            'precision_score': make_scorer(precision_score)
        }

    mlp = MLPClassifier(max_iter=max_iter)
    mlpc_random = RandomizedSearchCV(estimator = mlp, param_distributions = random_grid, n_iter = n_iter, cv = cv, verbose=verbose, random_state=0, n_jobs = n_jobs,
                                      scoring=scorers, refit='precision_score')

    mlpc_random.fit(X_train, y_train)
    return mlpc_random.best_estimator_, mlpc_random.best_params_

In [None]:
# -----------------------
# RANDOMGRIDSEARCH: XGBOOST CLASSIFIER
# -----------------------
# https://xgboost.readthedocs.io/en/latest/parameter.html
# http://danielhnyk.cz/how-to-use-xgboost-in-python/
def start_randomizedSearchCV_xgboost(X_train, y_train, 
                                           n_iter = 2, cv = 3, verbose=20, n_jobs = 3): #n_jobs = -1
    
    n_estimators = [int(x) for x in np.linspace(start = 20, stop = 500, num = 20)]
    max_depth = [int(x) for x in np.linspace(start = 2, stop = 30, num = 20)]
    
    # TODO : A paramétrer proprement
    random_grid = {
    'n_estimators':n_estimators,
    'max_depth':max_depth,
    'booster':['gbtree', 'gblinear', 'dart'],
    'min_child_weight':[2,3,4,5],
    'colsample_bytree':[0.2,0.6,0.8],
    'colsample_bylevel':[0.2,0.6,0.8]
}
    
    scorers = {
            'precision_score': make_scorer(precision_score)
        }

    xgb = XGBClassifier()
    xgb_random = RandomizedSearchCV(estimator = xgb, param_distributions = random_grid, n_iter = n_iter, cv = cv, verbose=verbose, random_state=0, n_jobs = n_jobs,
                                      scoring=scorers, refit='precision_score')

    xgb_random.fit(X_train, y_train)
    return xgb_random.best_estimator_, xgb_random.best_params_