### Class weights

In [None]:
#class weights
[{0:x, 1:1.0-x} for x in np.linspace(0.0,0.99,100)]

# KNN

In [None]:
# KNN
%%time

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
 


params = [
    {'knn__algorithm': ['ball_tree', 'kd_tree', 'brute'], 
     'knn__leaf_size': np.arange(5,55,5),
     'knn__n_neighbors': np.arange(1,11,1), 
     'knn__p': [1,2], 
     'knn__weights': ['uniform', 'distance'],
     'knn__metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski']}
]

knn = Pipeline(steps = [
    ('scaler', StandardScaler()), 
    ('knn', KNeighborsClassifier())
])

tuning = 1

while tuning <=3:
    grid_search = GridSearchCV(estimator = knn, 
                               param_grid = params, 
                               cv = StratifiedKFold(n_splits=10, random_state=seed, shuffle = True), 
                               scoring = 'f1', n_jobs = -1) #To use all processors

    grid_search.fit(X_train_pars, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning +=1

# Support Vector Machine

In [None]:
%%time 
#KERNAL SVC

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC

params = [
    {'svc__C': np.arange(1.0, 11.1,1), 
     'svc__kernel': ['rbf'],
     'svc__gamma': ['scale', 'auto'],
     'svc__probability': [True], 
     'svc__class_weight': ['balanced']}, 
#     {'svc__C': np.arange(1.0, 11.1,1), 
#      'svc__kernel': ['poly'], 
#      'svc__degree': np.arange(1,7,1), 
#      'svc__gamma': ['scale', 'auto'], 
#      'svc__coef0': [0.0, 0.01, 0.1, 0.2], 
#      'svc__class_weight': ['balanced'], 
#      'svc__probability': [True]},
    {'svc__C': np.arange(1.0, 11.1,1), 
     'svc__kernel': ['sigmoid'], 
     'svc__gamma': ['scale', 'auto'], 
     'svc__coef0': [0.0, 0.01, 0.1, 0.2], 
     'svc__probability': [True], 
     'svc__class_weight': ['balanced']}, 
    {'svc__C': np.arange(1.0, 11.1,1), 
     'svc__kernel': ['linear'], 
     'svc__probability': [True], 
     'svc__class_weight': ['balanced']}
]

svc = Pipeline(steps = [
    ('scaler', StandardScaler()), 
    ('svc', SVC())
])

grid_search = RandomizedSearchCV(estimator = svc, 
                                 param_distributions = params, 
                                 cv = StratifiedKFold(n_splits=10, random_state=seed, shuffle = True), 
                                 scoring = 'roc_auc', 
                                 n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(X_train_pars, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# Logistic Regression

In [None]:
%%time

# For Logistic Regression

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression

params = [
    {'logit_C': np.arange(0.1,10,0.1), 
     'logit_class_weight': [{0:x, 1:1.0-x} for x in np.linspace(0.0,0.99,100)], 
     'logit_penalty': ['l1', 'l2', 'elasticnet', 'none'], 
     'logit_solver': ['newton-cg', 'lbfgs', 'sag', 'saga']},
    {'logit_C': np.arange(0.1,10,0.1),
     'logit_class_weight': [{0:x, 1:1.0-x} for x in np.linspace(0.0,0.99,100)],
     'logit_penalty': ['l1', 'l2', 'elasticnet'], 
     'logit_solver': ['liblinear']}
]

logit = Pipeline(steps = [
    ('scaler', StandardScaler()), 
    ('logit', LogisticRegression())
])

grid_search = GridSearchCV(estimator = logit, 
                                 param_grid= params, 
                                 cv = StratifiedKFold(n_splits=10, random_state=seed, shuffle = True), 
                                 scoring = 'f1', 
                                 n_jobs = -1) #To use all processors
tuning = 1
while tuning <=5:
    
    grid_search.fit(X_train_pars, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print('Logistic Regression Params:')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning +=1

# GaussianProcessClassifier

In [None]:
%%time

# For Logistic Regression

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold

from sklearn.gaussian_process.kernels import (
    RBF,
    DotProduct, 
    Matern, 
    RationalQuadratic,
    WhiteKernel, 
    ExpSineSquared 
)

params = [
    {'gpc__kernel': [1*RBF(), 1*DotProduct(), 1*Matern(),  1*RationalQuadratic(), 1*WhiteKernel(),
                     1*ExpSineSquared()]}
]

gpc = Pipeline(steps = [
    ('scaler', StandardScaler()), 
    ('gpc', GaussianProcessClassifier())
])

grid_search = GridSearchCV(estimator = gpc, 
                                 param_grid= params, 
                                 cv = StratifiedKFold(n_splits=10, random_state=seed, shuffle = True), 
                                 scoring = 'roc_auc', n_jobs = -1) #To use all processors
tuning = 1
while tuning <2:
    
    grid_search.fit(X_train_pars, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print('Logistic Regression Params:')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning +=1

# LogisticRegressionCV

In [None]:
%%time

# For Logistic Regression

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression

params = [
    {'logit_CV__Cs': np.arange(0,30,1), 
     'logit_CV__cv': [10],
     'logit_CV__class_weight': ['balanced'], 
     'logit_CV__penalty': ['l1'], 
     'logit_CV__solver': ['liblinear', 'saga']},
    {'logit_CV__Cs': np.arange(0,30,1),
     'logit_CV__cv': [10],
     'logit_CV__class_weight': ['balanced'],
     'logit_CV__penalty': ['l2'], 
     'logit_CV__solver': ['newton-cg', 'lbfgs', 'sag']}, 
    {'logit_CV__Cs': np.arange(0,30,1),
     'logit_CV__cv': [10],
     'logit_CV__class_weight': ['balanced'],
     'logit_CV__penalty': ['elasticnet'], 
     'logit_CV__solver': ['saga'], 
     'logit_CV__l1_ratios': [0.5]}
]

logit_cv = Pipeline(steps = [
    ('scaler', StandardScaler()), 
    ('logit_CV', LogisticRegressionCV(scoring = 'roc_auc'))
])


grid_search = RandomizedSearchCV(estimator = logit_cv, 
                                 param_distributions= params, 
                                 cv = StratifiedKFold(n_splits=10, random_state=seed, shuffle = True), 
                                 scoring = 'roc_auc', n_jobs = -1) #To use all processors
tuning = 1
while tuning <=5:
    
    grid_search.fit(X_train_pars, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print('Logistic Regression Params:')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning +=1

# Gaussian NB

In [None]:
%%time

# For Logistic Regression

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold

params = [
    {'gnb__var_smoothing': np.logspace(0,-9, num=100)}
]

gnb = Pipeline(steps = [
    ('scaler', StandardScaler()), 
    ('gnb', GaussianNB())
])

grid_search = GridSearchCV(estimator = gnb, 
                                 param_grid= params, 
                                 cv = StratifiedKFold(n_splits=10, random_state=seed, shuffle = True), 
                                 scoring = 'roc_auc', n_jobs = -1) #To use all processors
tuning = 1
while tuning <2:
    
    grid_search.fit(X_train_pars, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print('Logistic Regression Params:')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning +=1

# Random Forest

In [None]:
#Random Forest
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier

params = [
    {'n_estimators': np.arange(50,1050,50), 
     'criterion': ['gini', 'entropy'], 
     'max_depth': np.arange(1,7,1), 
     'min_samples_split': [2,3,10], 
     'min_samples_leaf': [1,2,10], 
     'max_features': ['auto', 'sqrt', 'log2'], 
     'ccp_alpha': [0.0, 0.001, 0.015,0.035], 
     'bootstrap':[True, False], 
     'class_weight': ["balanced", "balanced_subsample"]}
]

grid_search = RandomizedSearchCV(estimator = RandomForestClassifier(), 
                           param_distributions = params, cv = skf, 
                           scoring = 'f1', n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(#X_train, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# Balanced Random Forest Classifier

In [None]:
#Balanced Random Forest Classifier
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from imblearn.ensemble import BalancedRandomForestClassifier

params = [
    {'n_estimators': np.arange(50,1050,50), 
     'criterion': ['gini', 'entropy'], 
     'max_depth': np.arange(1,7,1), 
     'min_samples_split': [1,2,3,4,5,10], 
     'min_samples_leaf': [1,2,3,4,5,10], 
     'max_features': ['auto', 'sqrt', 'log2'],
     'max_leaf_nodes': [None,1,2,3,4],
     'ccp_alpha': [0.0, 0.001, 0.015,0.035], 
     'bootstrap':[True, False], 
     'replacement': [True, False],
     'class_weight': ['balanced', 'balanced_subsample'], 
     'max_samples': [1.0,0.8,0.5,0.3]}
]

grid_search = RandomizedSearchCV(estimator = BalancedRandomForestClassifier(), 
                           param_distributions = params, cv = skf, 
                           scoring = 'f1', n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(X_train_pars_df, y_train_df)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# Gradient Boosting Classifier

In [None]:
#Gradient Boosting Classifier
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier

params = [
    {'loss': ['deviance', 'exponential'], 
     'learning_rate': [0.3, 0.2,0.15,0.1,0.05,0.01,0.005,0.001], 
     'n_estimators': [100,250,500,750,1000,1250,1500,1750],
     'min_samples_split': [2,4,6,8,10,20,40,60,100], 
     'min_samples_leaf': [1,3,5,7,9], 
     'max_depth':[3,4,5,6,7], 
     'max_features': ['sqrt', 'log2'], 
     'ccp_alpha': [0.0,0.015,0.035], 
     'subsample':[0.5,0.7,0.75,0.8,0.85,0.9,0.95,1]}
]

skf = StratifiedKFold(n_splits=5, random_state=seed, shuffle = True)

grid_search = RandomizedSearchCV(estimator = GradientBoostingClassifier(), 
                           param_distributions = params, cv = skf, 
                           scoring = 'accuracy', n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(#X_train, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# Decision Tree

In [None]:
#Decision Tree
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier

params = [{
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': np.arange(1,7,1),
    'min_samples_split': [2,3,10],
    'min_samples_leaf': [1,2,10],
    'max_features': ['auto', 'sqrt', 'log2'],
    'class_weight': ['balanced', None],
    'ccp_alpha': [0.0, 0.001, 0.015,0.035]
}]

skf = StratifiedKFold(n_splits=10, random_State=seed, shuffle = True)

grid_search = RandomizedSearchCV(estimator = DecisionTreeClassifier(), 
                           param_distributions = params, cv = skf, 
                           scoring = 'f1', n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(#X_train, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# ExtraTree Classifier

In [None]:
#ExtraTree Classifier
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import ExtraTreesClassifier

params = [
    {'n_estimators': np.arange(50,1050,50), 
     'criterion': ['gini', 'entropy'],
     'max_depth': np.arange(1,7,1), 
     'min_samples_split':[2,3,10], 
     'min_samples_leaf': [1,2,10], 
     'max_features': ['auto', 'sqrt', 'log2'], 
     'bootstrap': [True, False], 
     'n_jobs': [-1], 
     'class_weight': ['balanced', 'balanced_subsample'], 
     'ccp_alpha':[0.0,0.01,0.015,0.035]}
]

grid_search = RandomizedSearchCV(estimator = ExtraTreesClassifier(), 
                           param_distributions = params, cv = skf, 
                           scoring = 'f1', n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(#X_train, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# XGBOOST

In [None]:
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from xgboost import XGBClassifier

params = [
    {'booster': ['dart', 'gbtree', 'gblinear'], 
     'learning_rate': [0.15,0.1,0.05,0.01,0.005,0.001],
     'max_depth': [3, 4, 5, 6, 7],
     "min_child_weight" : [1, 3, 5, 7, 10, 15, 20],
     "gamma" : [ 0.0, 0.1, 0.2 , 0.3, 0.4, 1, 1.5, 2, 5],
     'subsample': [0.5, 0.6, 0.7, 0.8, 1.0],
     'colsample_bytree': [0.3, 0.4, 0.5 , 0.6, 0.7, 0.8, 1.0],
     'n_estimators' : [100, 200, 500,800,1000], 
     'base_score': [0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65], 
     'max_delta_step': [0, 1, 2, 3, 5, 10],
     'reg_alpha': [0, 0.5, 1, 1.5, 2],
     'objective': ['binary:logistic']}
]

grid_search = RandomizedSearchCV(estimator = XGBClassifier(use_label_encoder=False, eval_metric = 'error'),
                                 param_distributions = params, 
                                 cv = skf, 
                                 scoring = 'f1', 
                                 n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(#X_train, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# CatBoost

In [None]:
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from catboost import CatBoostClassifier

params = [{
    'depth':[1,2,3,4,5,6,7],
    'iterations':[250,100,500,1000],
    'learning_rate':[0.001,0.005,0.03,0.02,0.01,0.1,0.2,0.3], 
    'l2_leaf_reg':[3.0,1.0,5.0,10.0,100.0],
    'border_count':[32,5,10,20,50,100,200], 
    'boosting_type': ['Ordered', 'Plain']
}]

cat = CatBoostClassifier(verbose = False, loss_function='CrossEntropy', eval_metric='TotalF1')

grid_search = RandomizedSearchCV(estimator = cat, param_distributions = params, cv = skf, scoring = 'f1', 
                                 n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(#X_train, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1

# Bagging Classifier

In [None]:
%%time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import BaggingClassifier

params = [{
    'n_estimators': np.arange(5,1005,5), 
    'max_samples': [1.0, 0.8, 0.5, 0.3], 
    'max_features': [1.0, 0.8, 0.5, 0.3], 
    'bootstrap' : [True, False], 
    'bootstrap_features' : [True, False], 
    'warm_start': [True, False], 
    'n_jobs': [-1]
}]

grid_search = RandomizedSearchCV(estimator = BaggingClassifier(), 
                                 param_distributions = params, cv = skf, scoring = 'f1', 
                                 n_jobs = -1) #To use all processors

tuning = 1

while tuning <=5:
    grid_search.fit(#X_train, y_train)
    best_accuracy = grid_search.best_score_
    best_param = grid_search.best_params_
    print(f'tuning: {tuning}')
    print(f'Best Accuracy: {best_accuracy*100:.4f} %')
    print(f'Best Parameters: {best_param}')
    print()
    tuning += 1