Ce notebook a pour objectif de pouvoir selectionner les différents hyperparametres utilisés lors de l'entrainement de nos modèles et de les  selectionner. 
la fonction Utils.megaGridSearch réalise : 
- le split *(test_size = 0.2)*
- test notre gridsearch
- Entraine le modèle avec les meilleurs hyperparametres
- sauvegarde les informations suivante au format joblib : 
    - 'name': model_name,
    - 'best_params': best_params,
    - 'best_model': best_model,
    - 'y_pred': y_pred,
    - 'confusion_matrix': confusion_matrix(y_test, y_pred),
    - 'classification_report': classification_report(y_test, y_pred),
    - 'execution_time_search_params': execution_time_search_params,
    - 'execution_time_training': execution_time_training,
    - 'accuracy': accuracy_score(y_test, y_pred),
    - 'precision': precision_score(y_test, y_pred),
    - 'recall': recall_score(y_test, y_pred),
    - 'f1_score': f1_score(y_test, y_pred),
    - 'auc': roc_auc_score(y_test, best_model.predict_proba(X_test)[:, 1])


Nous pourrons donc utiliser ce process pour entrainer nos modèles sur des dataframes plus spécifiques à l'avenir *(par post de joueur / par zone / ...)*

In [None]:
import Utils
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

data = pd.read_csv("../data/processed/best_player_preprocessed.csv")
features = data.drop('SHOT_MADE_FLAG', axis=1)
target = data['SHOT_MADE_FLAG']


mgs = Utils.megaGridSearch( name="best_player", features = features, target = target)

# Logistic Regression
lr = Utils.modelDefinition( name = 'logistic_model',
                            params = {
                                'penalty': ['l1', 'l2'],
                                'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                                'solver': ['liblinear', 'saga'] },
                            estimator = LogisticRegression())
mgs.AddModel(lr)

# Decision tree
dt = Utils.modelDefinition( name = 'decision_tree_model',
                            params = {
                                'splitter': ['best', 'random'],
                                'max_depth': [None, 10, 20, 30, 40, 50],
                                'min_samples_split': [2, 5, 10],
                                'min_samples_leaf': [1, 2, 4] },
                            estimator = DecisionTreeClassifier())
mgs.AddModel(dt)

# Gradient Boosting
gb = Utils.modelDefinition( name = 'gradient_boosting_model',
                            params = {
                                'n_estimators': [100, 200],
                                'max_depth': [3, 4],
                                'subsample': [0.5, 0.6],
                                'learning_rate': [0.02, 0.03] },
                            estimator = GradientBoostingClassifier())
mgs.AddModel(gb)

# xgboost_model
xgb = Utils.modelDefinition( name = 'xgboost_model',
                             params = {
                                    'n_estimators': [100, 200],
                                    'max_depth': [3, 4],
                                    'subsample': [0.5, 0.6],
                                    'learning_rate': [0.02, 0.03],
                                    'gamma': [0.1, 0.2],
                                    'colsample_bytree': [0.5, 0.6] },
                             estimator = XGBClassifier())
mgs.AddModel(xgb)

# Ada Boost
abm = Utils.modelDefinition( name = 'adaboost_model',
                             params = {
                                    'n_estimators': [50, 100, 150],
                                    'learning_rate': [0.01, 0.1, 0.2],
                                    'algorithm': ['SAMME', 'SAMME.R']},
                             estimator = AdaBoostClassifier())
mgs.AddModel(abm)


# LightGBM
lgb = Utils.modelDefinition( name = 'lightgbm_model',
                             params = {
                                    'n_estimators': [100, 200],
                                    'max_depth': [3, 4],
                                    'learning_rate': [0.02, 0.03],
                                    'subsample': [0.5, 0.6],
                                    'objective': ['binary'],
                                    'metric': ['binary_error']},
                             estimator = LGBMClassifier())
mgs.AddModel(lgb)





In [None]:
mgs.run()