In [30]:
# models
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

import json

## Models list:
- LogisticRegression(),
- SGDClassifier(),
- DecisionTreeClassifier(),
- RandomForestClassifier(),
- GradientBoostingClassifier(),
- ExtraTreesClassifier(),
- AdaBoostClassifier(),
- SVC(),
- GaussianNB(),
- MLPClassifier()
- KNeighborsClassifier()
- XGBClassifier()

In [31]:
grid_search_dict = {}

### LogisticRegression

In [32]:
logreg = LogisticRegression()
logreg.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [33]:
logreg_params = {
    'C':[1.0, 0.8, 0.6],
    'penalty': ['l2','l1'],
    'random_state': [42, 1453],
    'solver': ['saga','liblinear']
}

grid_search_dict[type(logreg).__name__] = logreg_params

### SGDClassifier

In [34]:
sgd = SGDClassifier()

sgd.get_params()

{'alpha': 0.0001,
 'average': False,
 'class_weight': None,
 'early_stopping': False,
 'epsilon': 0.1,
 'eta0': 0.0,
 'fit_intercept': True,
 'l1_ratio': 0.15,
 'learning_rate': 'optimal',
 'loss': 'hinge',
 'max_iter': 1000,
 'n_iter_no_change': 5,
 'n_jobs': None,
 'penalty': 'l2',
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'tol': 0.001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [35]:
sgd_params = {
    'alpha':[0.0001, 0.001, 0.01],
    'penalty': ['l2','l1'],
    'random_state': [42, 1453],
    'loss': ['hinge', 'perceptron']
}

grid_search_dict[type(sgd).__name__] = sgd_params

### DecisionTreeClassifier

In [36]:
tree = DecisionTreeClassifier()
tree.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}

In [37]:
tree_params = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'splitter' : ['best', 'random'],
    'max_depth': [None, 100, 10],
}

grid_search_dict[type(tree).__name__] = tree_params

### RandomForestClassifier

In [38]:
forest = RandomForestClassifier()
forest.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [39]:
forest_params = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'n_estimators': [100, 200, 300]
}

grid_search_dict[type(forest).__name__] = forest_params

###### GradientBoostingClassifier

In [40]:
gbs = GradientBoostingClassifier()
gbs.get_params()

{'ccp_alpha': 0.0,
 'criterion': 'friedman_mse',
 'init': None,
 'learning_rate': 0.1,
 'loss': 'deviance',
 'max_depth': 3,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_iter_no_change': None,
 'random_state': None,
 'subsample': 1.0,
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [41]:
gbs_params = {
    'criterion': ['friedman_mse'],
    'n_estimators': [100, 200, 300]
}

grid_search_dict[type(gbs).__name__] = gbs_params

### ExtraTreesClassifier

In [42]:
etc = ExtraTreesClassifier()
etc.get_params()

{'bootstrap': False,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [43]:
etc_params = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'n_estimators': [100, 200, 300]
}

grid_search_dict[type(etc).__name__] = etc_params

### AdaBoostClassifier

In [44]:
abc = AdaBoostClassifier()
abc.get_params()

{'algorithm': 'SAMME.R',
 'base_estimator': None,
 'learning_rate': 1.0,
 'n_estimators': 50,
 'random_state': None}

In [45]:
abc_params = {
    'algorithm': ['SAMME.R', 'SAMME'],
    'n_estimators': [50, 100, 200]
}

grid_search_dict[type(abc).__name__] = abc_params

### SVC

In [46]:
svc = SVC()
svc.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [47]:
svc_params = {
    'C': [1.0, 2.0],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

grid_search_dict[type(svc).__name__] = svc_params

### GaussianNB

In [48]:
gnb = GaussianNB()
gnb.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [49]:
gnb_params = {
    'var_smoothing': [1e-09, 1e-08, 1e-07]
}

grid_search_dict[type(gnb).__name__] = gnb_params

### MLPClassifier

In [50]:
mlp = MLPClassifier()
mlp.get_params()

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [51]:
mlp_params = {
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'hidden_layer_sizes': [100, 200]
}

grid_search_dict[type(mlp).__name__] = mlp_params

In [52]:
grid_search_dict

{'LogisticRegression': {'C': [1.0, 0.8, 0.6],
  'penalty': ['l2', 'l1'],
  'random_state': [42, 1453],
  'solver': ['saga', 'liblinear']},
 'SGDClassifier': {'alpha': [0.0001, 0.001, 0.01],
  'penalty': ['l2', 'l1'],
  'random_state': [42, 1453],
  'loss': ['hinge', 'perceptron']},
 'DecisionTreeClassifier': {'criterion': ['gini', 'entropy', 'log_loss'],
  'splitter': ['best', 'random'],
  'max_depth': [None, 100, 10]},
 'RandomForestClassifier': {'criterion': ['gini', 'entropy', 'log_loss'],
  'n_estimators': [100, 200, 300]},
 'GradientBoostingClassifier': {'criterion': ['friedman_mse'],
  'n_estimators': [100, 200, 300]},
 'ExtraTreesClassifier': {'criterion': ['gini', 'entropy', 'log_loss'],
  'n_estimators': [100, 200, 300]},
 'AdaBoostClassifier': {'algorithm': ['SAMME.R', 'SAMME'],
  'n_estimators': [50, 100, 200]},
 'SVC': {'C': [1.0, 2.0], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid']},
 'GaussianNB': {'var_smoothing': [1e-09, 1e-08, 1e-07]},
 'MLPClassifier': {'activation': 

## KNeighborsClassifier

In [53]:
KNC = KNeighborsClassifier()
KNC.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [54]:
KNC_params = {
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2]
}

grid_search_dict[type(KNC).__name__] = KNC_params

## XGBClassifier()

In [55]:
xgb = XGBClassifier()
xgb.get_params()

{'objective': 'binary:logistic',
 'use_label_encoder': None,
 'base_score': None,
 'booster': None,
 'callbacks': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': None,
 'early_stopping_rounds': None,
 'enable_categorical': False,
 'eval_metric': None,
 'feature_types': None,
 'gamma': None,
 'gpu_id': None,
 'grow_policy': None,
 'importance_type': None,
 'interaction_constraints': None,
 'learning_rate': None,
 'max_bin': None,
 'max_cat_threshold': None,
 'max_cat_to_onehot': None,
 'max_delta_step': None,
 'max_depth': None,
 'max_leaves': None,
 'min_child_weight': None,
 'missing': nan,
 'monotone_constraints': None,
 'n_estimators': 100,
 'n_jobs': None,
 'num_parallel_tree': None,
 'predictor': None,
 'random_state': None,
 'reg_alpha': None,
 'reg_lambda': None,
 'sampling_method': None,
 'scale_pos_weight': None,
 'subsample': None,
 'tree_method': None,
 'validate_parameters': None,
 'verbosity': None}

In [56]:
xgb_params = {
    'booster': ['gbtree', 'gblinear', 'dart'],
    }

grid_search_dict[type(xgb).__name__] = xgb_params

# Export to json

In [57]:
json_object = json.dumps(grid_search_dict, indent=4)
with open('data/grid_search_params.json', 'w') as file:
    file.write(json_object)

In [58]:
with open ('data/grid_search_params.json', 'r') as j:
    grid_search_dict = json.load (j)
grid_search_dict

{'LogisticRegression': {'C': [1.0, 0.8, 0.6],
  'penalty': ['l2', 'l1'],
  'random_state': [42, 1453],
  'solver': ['saga', 'liblinear']},
 'SGDClassifier': {'alpha': [0.0001, 0.001, 0.01],
  'penalty': ['l2', 'l1'],
  'random_state': [42, 1453],
  'loss': ['hinge', 'perceptron']},
 'DecisionTreeClassifier': {'criterion': ['gini', 'entropy', 'log_loss'],
  'splitter': ['best', 'random'],
  'max_depth': [None, 100, 10]},
 'RandomForestClassifier': {'criterion': ['gini', 'entropy', 'log_loss'],
  'n_estimators': [100, 200, 300]},
 'GradientBoostingClassifier': {'criterion': ['friedman_mse'],
  'n_estimators': [100, 200, 300]},
 'ExtraTreesClassifier': {'criterion': ['gini', 'entropy', 'log_loss'],
  'n_estimators': [100, 200, 300]},
 'AdaBoostClassifier': {'algorithm': ['SAMME.R', 'SAMME'],
  'n_estimators': [50, 100, 200]},
 'SVC': {'C': [1.0, 2.0], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid']},
 'GaussianNB': {'var_smoothing': [1e-09, 1e-08, 1e-07]},
 'MLPClassifier': {'activation': 