# **Optimization using optuna**

# **Ensemble Regressors**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from functools import partial
#!pip install optuna
import optuna
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.neighbors import KNeighborsRegressor

In [None]:
def optimize_ensemble(trail, x, y):
  n_estimators = trail.suggest_int('n_estimators', 100, 1000)

  #criterion = trail.suggest_categorical('criterion', ['mse', 'mae'])

  max_depth =  trail.suggest_int('max_depth', 1, 70)

  min_samples_split = trail.suggest_int('min_samples_split', 1, 10)

  min_samples_leaf =  trail.suggest_int('min_samples_leaf', 1, 10)
  
  bootstrap = trail.suggest_categorical('bootstrap', [True, False])

  max_features = trail.suggest_uniform('max_features', 0.01, 1.0)

  ccp_alpha = trail.suggest_uniform('ccp_alpha', 0, 1.0)

  model = ExtraTreesRegressor(n_estimators= n_estimators, max_depth= max_depth, min_samples_leaf= min_samples_leaf,
                                min_samples_split= min_samples_split, bootstrap= bootstrap,
                                max_features= max_features, ccp_alpha = ccp_alpha)
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_ensemble, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 16:59:54,803][0m A new study created in memory with name: no-name-1596c178-c215-4be7-b678-9ed69d62d86f[0m
[32m[I 2021-08-19 16:59:59,703][0m Trial 0 finished with value: 3524.0690037506765 and parameters: {'n_estimators': 268, 'max_depth': 3, 'min_samples_split': 4, 'min_samples_leaf': 7, 'bootstrap': True, 'max_features': 0.9216179559086601, 'ccp_alpha': 0.9866721747587908}. Best is trial 0 with value: 3524.0690037506765.[0m
[32m[I 2021-08-19 17:00:03,897][0m Trial 1 finished with value: 5175.516306359286 and parameters: {'n_estimators': 426, 'max_depth': 12, 'min_samples_split': 4, 'min_samples_leaf': 7, 'bootstrap': False, 'max_features': 0.014583938186168821, 'ccp_alpha': 0.3206492626718541}. Best is trial 0 with value: 3524.0690037506765.[0m
[32m[I 2021-08-19 17:00:09,895][0m Trial 2 finished with value: 2534.2861042017857 and parameters: {'n_estimators': 387, 'max_depth': 54, 'min_samples_split': 10, 'min_samples_leaf': 3, 'bootstrap': False, 'max_fea

# Linear Models Regression

In [None]:
def optimize_linear_models(trail, x, y):
  alpha = trail.suggest_uniform('alpha', 0, 100)
  model = ElasticNet(alpha = alpha)
  
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_linear_models, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 17:01:38,670][0m A new study created in memory with name: no-name-06e022b2-6250-46f7-9d86-01f4f1b11254[0m
[32m[I 2021-08-19 17:01:38,739][0m Trial 0 finished with value: 9013.118497471603 and parameters: {'alpha': 73.54419566912779}. Best is trial 0 with value: 9013.118497471603.[0m
[32m[I 2021-08-19 17:01:38,804][0m Trial 1 finished with value: 8969.102571843898 and parameters: {'alpha': 29.922208812296102}. Best is trial 1 with value: 8969.102571843898.[0m
[32m[I 2021-08-19 17:01:38,868][0m Trial 2 finished with value: 8997.057328469837 and parameters: {'alpha': 47.41008812924724}. Best is trial 1 with value: 8969.102571843898.[0m
[32m[I 2021-08-19 17:01:38,934][0m Trial 3 finished with value: 8998.91196981003 and parameters: {'alpha': 49.365275328530146}. Best is trial 1 with value: 8969.102571843898.[0m
[32m[I 2021-08-19 17:01:38,996][0m Trial 4 finished with value: 9007.165046421624 and parameters: {'alpha': 60.72863945317578}. Best is trial 1 wi

# KNN Regressor

In [None]:
def optimize_knn(trail, x, y):
  n_neighbors = trail.suggest_int('n_neighbors', 1, 100)
  p = trail.suggest_categorical('p', [1, 2])
  model = KNeighborsRegressor(n_neighbors = n_neighbors, p = p)
  
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_knn, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 17:01:39,754][0m A new study created in memory with name: no-name-e8975c41-de11-4195-bc80-9fe2d398d1c5[0m
[32m[I 2021-08-19 17:01:39,857][0m Trial 0 finished with value: 8854.250328714232 and parameters: {'n_neighbors': 59, 'p': 2}. Best is trial 0 with value: 8854.250328714232.[0m
[32m[I 2021-08-19 17:01:39,938][0m Trial 1 finished with value: 8720.939407521393 and parameters: {'n_neighbors': 36, 'p': 2}. Best is trial 1 with value: 8720.939407521393.[0m
[32m[I 2021-08-19 17:01:40,019][0m Trial 2 finished with value: 7434.1398587811245 and parameters: {'n_neighbors': 26, 'p': 1}. Best is trial 2 with value: 7434.1398587811245.[0m
[32m[I 2021-08-19 17:01:40,109][0m Trial 3 finished with value: 8741.293710153197 and parameters: {'n_neighbors': 38, 'p': 2}. Best is trial 2 with value: 7434.1398587811245.[0m
[32m[I 2021-08-19 17:01:40,209][0m Trial 4 finished with value: 8145.55973549403 and parameters: {'n_neighbors': 100, 'p': 1}. Best is trial 2 with 

# SVR

In [None]:
from sklearn.svm import SVR
def optimize_svr(trail, x, y):
  kernel = trail.suggest_categorical('kernel', ['poly', 'rbf'])
  C = trail.suggest_uniform('C', 1.0, 10.0)
  epsilon = trail.suggest_uniform('epsilon', 0.1, 2)
  degree = trail.suggest_int('degree', 2, 10)
  model = SVR(kernel = kernel, C = C, epsilon = epsilon, degree = degree)
  
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_svr, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 17:01:41,107][0m A new study created in memory with name: no-name-da14dba9-b571-4c68-8975-d56378a99b47[0m
[32m[I 2021-08-19 17:01:42,061][0m Trial 0 finished with value: 7932.219739745054 and parameters: {'kernel': 'rbf', 'C': 9.703405283601066, 'epsilon': 0.8246734910060872, 'degree': 8}. Best is trial 0 with value: 7932.219739745054.[0m
[32m[I 2021-08-19 17:01:42,816][0m Trial 1 finished with value: 7030.397010344338 and parameters: {'kernel': 'poly', 'C': 9.242578950302551, 'epsilon': 0.9267598178687689, 'degree': 5}. Best is trial 1 with value: 7030.397010344338.[0m
[32m[I 2021-08-19 17:01:43,727][0m Trial 2 finished with value: 8145.806110887793 and parameters: {'kernel': 'rbf', 'C': 4.763346577715986, 'epsilon': 0.4948364423740502, 'degree': 2}. Best is trial 1 with value: 7030.397010344338.[0m
[32m[I 2021-08-19 17:01:44,619][0m Trial 3 finished with value: 8233.278187781962 and parameters: {'kernel': 'rbf', 'C': 2.7497389765360105, 'epsilon': 1.42

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
def optimize_dtr(trail, x, y):
  criterion = trail.suggest_categorical('criterion', ['mse', 'friedman_mse'])
  splitter = trail.suggest_categorical('splitter', ['best', 'random'])
  max_depth = trail.suggest_int('max_depth', 1, 80)
  min_samples_split = trail.suggest_int('min_samples_split', 1, 10)
  min_samples_leaf =  trail.suggest_int('min_samples_leaf', 1, 10)
  ccp_alpha = trail.suggest_uniform('ccp_alpha', 0, 1.0)
  model = DecisionTreeRegressor(criterion = criterion,
                                splitter = splitter,
                                max_depth = max_depth,
                                min_samples_leaf = min_samples_leaf,
                                min_samples_split = min_samples_split,
                                ccp_alpha = ccp_alpha)

  
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_dtr, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 17:01:53,040][0m A new study created in memory with name: no-name-843c9d57-dc0e-420b-a1ec-b64557859733[0m
[32m[I 2021-08-19 17:01:53,107][0m Trial 0 finished with value: 2874.7313831053148 and parameters: {'criterion': 'friedman_mse', 'splitter': 'best', 'max_depth': 3, 'min_samples_split': 6, 'min_samples_leaf': 10, 'ccp_alpha': 0.11978324103274718}. Best is trial 0 with value: 2874.7313831053148.[0m
[32m[I 2021-08-19 17:01:53,179][0m Trial 1 finished with value: 2721.02009936493 and parameters: {'criterion': 'mse', 'splitter': 'best', 'max_depth': 37, 'min_samples_split': 6, 'min_samples_leaf': 8, 'ccp_alpha': 0.4998785196476918}. Best is trial 1 with value: 2721.02009936493.[0m
[32m[I 2021-08-19 17:01:53,251][0m Trial 2 finished with value: 2702.840007286254 and parameters: {'criterion': 'friedman_mse', 'splitter': 'best', 'max_depth': 44, 'min_samples_split': 7, 'min_samples_leaf': 9, 'ccp_alpha': 0.2903147982773526}. Best is trial 2 with value: 2702.84

# XGBoost Regressor

In [None]:
#!pip install xgboost
from xgboost import XGBRegressor
def optimize_xgbr(trail, x, y):
  n_estimators = trail.suggest_int('n_estimators', 100, 1000)
  max_depth =  trail.suggest_int('max_depth', 1, 70)
  learning_rate = trail.suggest_uniform('learning_rate', 0.01, 1)
  gamma  = trail.suggest_uniform('gamma', 0, 1)
  min_child_weight = trail.suggest_int('min_child_weight', 1, 10)
  model = XGBRegressor(n_estimators = n_estimators,
                       max_depth = max_depth,
                       learning_rate = learning_rate,
                       gamma = gamma,
                       min_child_weight = min_child_weight
                       )

  
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_xgbr, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 17:01:54,341][0m A new study created in memory with name: no-name-f29dcf5e-faee-4a8c-9e6f-9230d175c2e2[0m
[32m[I 2021-08-19 17:02:14,204][0m Trial 0 finished with value: 2915.884552902041 and parameters: {'n_estimators': 356, 'max_depth': 43, 'learning_rate': 0.5303392743935041, 'gamma': 0.7357304629696291, 'min_child_weight': 1}. Best is trial 0 with value: 2915.884552902041.[0m
[32m[I 2021-08-19 17:02:25,500][0m Trial 1 finished with value: 2984.771855231 and parameters: {'n_estimators': 556, 'max_depth': 15, 'learning_rate': 0.193706528820632, 'gamma': 0.7452782424712598, 'min_child_weight': 3}. Best is trial 0 with value: 2915.884552902041.[0m
[32m[I 2021-08-19 17:02:27,161][0m Trial 2 finished with value: 3736.10287305119 and parameters: {'n_estimators': 281, 'max_depth': 4, 'learning_rate': 0.6393706520929532, 'gamma': 0.1265596153040175, 'min_child_weight': 7}. Best is trial 0 with value: 2915.884552902041.[0m
[32m[I 2021-08-19 17:02:31,177][0m Tr

# LGBRegressor

In [None]:
#!pip install lightgbm
from lightgbm import LGBMRegressor
def optimize_lgbr(trail, x, y):
  n_estimators = trail.suggest_int('n_estimators', 100, 1000)
  max_depth =  trail.suggest_int('max_depth', -1, 70)
  learning_rate = trail.suggest_uniform('learning_rate', 0.01, 1)
  gamma  = trail.suggest_uniform('gamma', 0, 1)
  min_child_weight = trail.suggest_uniform('min_child_weight', 0.001, 3)
  model = LGBMRegressor(n_estimators = n_estimators,
                       max_depth = max_depth,
                       learning_rate = learning_rate,
                       gamma = gamma,
                       min_child_weight = min_child_weight
                       )

  
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_lgbr, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 17:07:54,419][0m A new study created in memory with name: no-name-4fc0d239-3641-4c04-9ca4-61064b3c3a83[0m
[32m[I 2021-08-19 17:07:56,273][0m Trial 0 finished with value: 3898.765362899795 and parameters: {'n_estimators': 375, 'max_depth': 11, 'learning_rate': 0.6011957170088678, 'gamma': 0.7168788403589922, 'min_child_weight': 1.953771109871419}. Best is trial 0 with value: 3898.765362899795.[0m
[32m[I 2021-08-19 17:08:00,570][0m Trial 1 finished with value: 3645.1954757270964 and parameters: {'n_estimators': 743, 'max_depth': 46, 'learning_rate': 0.1711036859984021, 'gamma': 0.03637208795378044, 'min_child_weight': 1.9331635776778364}. Best is trial 1 with value: 3645.1954757270964.[0m
[32m[I 2021-08-19 17:08:01,713][0m Trial 2 finished with value: 3436.2913137673336 and parameters: {'n_estimators': 232, 'max_depth': 62, 'learning_rate': 0.2370844177477266, 'gamma': 0.288084739854477, 'min_child_weight': 1.864290275165581}. Best is trial 2 with value: 3436

In [None]:
#!pip install catboost
from catboost import CatBoostRegressor

def optimize_cbr(trail, x, y):
  n_estimators = trail.suggest_int('n_estimators', 100, 1000)
  depth =  trail.suggest_int('depth', 0, 10)
  learning_rate = trail.suggest_uniform('learning_rate', 0.01, 1)
  min_child_samples = trail.suggest_int(' min_child_samples', 0, 10)
  model = CatBoostRegressor(n_estimators = n_estimators,
                       depth = depth,
                       learning_rate = learning_rate,
                       min_child_samples = min_child_samples
                       )

  
  return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'neg_mean_absolute_error', n_jobs = -1))
  # kf = StratifiedKFold(n_splits= 10)
  # mean_error = []
  # for idx in kf.split(x, y):
  #   train_idx, test_idx = idx[0], idx[1]
  #   x_train = x.iloc[train_idx]
  #   y_train = y[train_idx]
  #   x_test = x.iloc[test_idx]
  #   y_test = y[test_idx]
  #   model.fit(x_train, y_train)
  #   pred = model.predict(x_test)
  #   result = mean_absolute_error(y_test, pred)
  #   mean_error.append(pred)
  #   return -np.mean(mean_error)

In [None]:
optimization_function = partial(optimize_cbr, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-19 17:20:39,910][0m A new study created in memory with name: no-name-4b6532bb-0280-401c-9619-c608fdef2336[0m
[32m[I 2021-08-19 17:21:30,028][0m Trial 0 finished with value: 3351.488076862411 and parameters: {'n_estimators': 685, 'depth': 10, 'learning_rate': 0.7921844052417468, ' min_child_samples': 10}. Best is trial 0 with value: 3351.488076862411.[0m
[32m[I 2021-08-19 17:21:41,744][0m Trial 1 finished with value: 3211.360637065467 and parameters: {'n_estimators': 690, 'depth': 7, 'learning_rate': 0.4459437291346898, ' min_child_samples': 7}. Best is trial 1 with value: 3211.360637065467.[0m
[32m[I 2021-08-19 17:21:47,769][0m Trial 2 finished with value: 2929.2196838402906 and parameters: {'n_estimators': 866, 'depth': 4, 'learning_rate': 0.2134703706112012, ' min_child_samples': 0}. Best is trial 2 with value: 2929.2196838402906.[0m
[32m[I 2021-08-19 17:21:50,701][0m Trial 3 finished with value: 2544.468457952672 and parameters: {'n_estimators': 490, 'de

# **Classification Algorithms Optimization**

In [43]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
#!pip install optuna
import optuna
iris = load_iris()

In [44]:
x_train= pd.DataFrame(data = iris.data, columns = iris.feature_names)
y_train = iris.target
x_train.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


# Ensemble Classification

In [45]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import StratifiedKFold
from functools import partial
def optimize_ensemble_classification(trail, x, y):
  n_estimators = trail.suggest_int('n_estimators', 100, 1000)

  criterion = trail.suggest_categorical('criterion', ['gini', 'entropy'])

  class_weight = trail.suggest_categorical('class_weight', ['balanced', 'balanced_subsample'])


  max_depth =  trail.suggest_int('max_depth', 1, 70)

  min_samples_split = trail.suggest_int('min_samples_split', 3, 10)

  min_samples_leaf =  trail.suggest_int('min_samples_leaf', 1, 10)
  
  bootstrap = trail.suggest_categorical('bootstrap', [True, False])

  max_features = trail.suggest_uniform('max_features', 0.01, 1.0)

  ccp_alpha = trail.suggest_uniform('ccp_alpha', 0, 1.0)

  model = ExtraTreesClassifier(n_estimators= n_estimators, max_depth= max_depth, min_samples_leaf= min_samples_leaf,
                                min_samples_split= min_samples_split, bootstrap= bootstrap, criterion = criterion, class_weight = class_weight,
                                max_features= max_features, ccp_alpha = ccp_alpha)
  #return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'roc_auc', n_jobs = -1))
  kf = StratifiedKFold(n_splits= 10)
  mean_metric = []
  for idx in kf.split(x, y):
    train_idx, test_idx = idx[0], idx[1]
    x_train = x.iloc[train_idx]
    y_train = y[train_idx]
    x_test = x.iloc[test_idx]
    y_test = y[test_idx]
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    result = accuracy_score(y_test, pred)
    mean_metric.append(pred)
    return -np.mean(mean_metric)

In [46]:
optimization_function = partial(optimize_ensemble_classification, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-22 12:17:25,622][0m A new study created in memory with name: no-name-bcc9636a-6aa6-440f-b9a9-c03cac628787[0m
[32m[I 2021-08-22 12:17:25,773][0m Trial 0 finished with value: -0.0 and parameters: {'n_estimators': 301, 'criterion': 'gini', 'class_weight': 'balanced', 'max_depth': 45, 'min_samples_split': 6, 'min_samples_leaf': 7, 'bootstrap': False, 'max_features': 0.25795107800368994, 'ccp_alpha': 0.8104200411031433}. Best is trial 0 with value: -0.0.[0m
[32m[I 2021-08-22 12:17:25,890][0m Trial 1 finished with value: -0.6666666666666666 and parameters: {'n_estimators': 228, 'criterion': 'entropy', 'class_weight': 'balanced', 'max_depth': 21, 'min_samples_split': 8, 'min_samples_leaf': 1, 'bootstrap': False, 'max_features': 0.7157835600098752, 'ccp_alpha': 0.8910820574153876}. Best is trial 1 with value: -0.6666666666666666.[0m
[32m[I 2021-08-22 12:17:26,453][0m Trial 2 finished with value: -1.0 and parameters: {'n_estimators': 755, 'criterion': 'entropy', 'class

# Logistic Regression

In [47]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
def optimize_logistic(trail, x, y):
  penalty = trail.suggest_categorical("penalty", ["l1", "l2"])
  C = trail.suggest_uniform('C', 0.001, 100)
  class_weight = trail.suggest_categorical('class_weight', ['balanced', None])
  solver = trail.suggest_categorical('solver', ['liblinear', 'saga'])
  model = LogisticRegression(penalty = penalty, C = C, class_weight = class_weight, solver = solver)
  
  #return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'roc_auc', n_jobs = -1))
  kf = StratifiedKFold(n_splits= 10)
  mean_metric = []
  for idx in kf.split(x, y):
    train_idx, test_idx = idx[0], idx[1]
    x_train = x.iloc[train_idx]
    y_train = y[train_idx]
    x_test = x.iloc[test_idx]
    y_test = y[test_idx]
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    result = accuracy_score(y_test, pred)
    mean_metric.append(pred)
    return -np.mean(mean_metric)

In [48]:
optimization_function = partial(optimize_logistic, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-22 12:17:30,981][0m A new study created in memory with name: no-name-9f4041c5-7e76-473d-9502-f802048b4a01[0m
[32m[I 2021-08-22 12:17:30,989][0m Trial 0 finished with value: -1.0 and parameters: {'penalty': 'l1', 'C': 61.55289049145759, 'class_weight': 'balanced', 'solver': 'saga'}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12:17:30,995][0m Trial 1 finished with value: -1.0 and parameters: {'penalty': 'l1', 'C': 42.89175670243155, 'class_weight': 'balanced', 'solver': 'saga'}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12:17:31,002][0m Trial 2 finished with value: -1.0 and parameters: {'penalty': 'l1', 'C': 33.687538302150394, 'class_weight': 'balanced', 'solver': 'saga'}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12:17:31,008][0m Trial 3 finished with value: -1.0 and parameters: {'penalty': 'l2', 'C': 60.775941030920904, 'class_weight': None, 'solver': 'saga'}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12

# KNN Classifier

In [49]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
def optimize_knn_classifier(trail, x, y):
  n_neighbors = trail.suggest_int('n_neighbors', 3, 100)
  p = trail.suggest_categorical('p', [2, 3])
  weights = trail.suggest_categorical('weights', ['uniform', 'distance'])
  model = KNeighborsClassifier(n_neighbors = n_neighbors, p = p, weights = weights)

  
  #return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'roc_auc', n_jobs = -1))
  kf = StratifiedKFold(n_splits= 10)
  mean_metric = []
  for idx in kf.split(x, y):
    train_idx, test_idx = idx[0], idx[1]
    x_train = x.iloc[train_idx]
    y_train = y[train_idx]
    x_test = x.iloc[test_idx]
    y_test = y[test_idx]
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    result = accuracy_score(y_test, pred)
    mean_metric.append(pred)
    return -np.mean(mean_metric)

In [50]:
optimization_function = partial(optimize_knn_classifier, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-22 12:17:31,125][0m A new study created in memory with name: no-name-b809b696-11a8-4c97-bb2c-7bc9e0d2343b[0m
[32m[I 2021-08-22 12:17:31,131][0m Trial 0 finished with value: -1.0666666666666667 and parameters: {'n_neighbors': 72, 'p': 3, 'weights': 'uniform'}. Best is trial 0 with value: -1.0666666666666667.[0m
[32m[I 2021-08-22 12:17:31,136][0m Trial 1 finished with value: -1.0 and parameters: {'n_neighbors': 6, 'p': 2, 'weights': 'distance'}. Best is trial 0 with value: -1.0666666666666667.[0m
[32m[I 2021-08-22 12:17:31,139][0m Trial 2 finished with value: -1.0 and parameters: {'n_neighbors': 80, 'p': 3, 'weights': 'distance'}. Best is trial 0 with value: -1.0666666666666667.[0m
[32m[I 2021-08-22 12:17:31,145][0m Trial 3 finished with value: -0.6666666666666666 and parameters: {'n_neighbors': 97, 'p': 3, 'weights': 'uniform'}. Best is trial 0 with value: -1.0666666666666667.[0m
[32m[I 2021-08-22 12:17:31,149][0m Trial 4 finished with value: -0.933333333

# SVC

In [51]:
from sklearn.svm import SVC
def optimize_svc(trail, x, y):
  kernel = trail.suggest_categorical('kernel', ['poly', 'rbf'])
  C = trail.suggest_uniform('C', 1.0, 10.0)
  degree = trail.suggest_int('degree', 2, 10)
  class_weight= trail.suggest_categorical('class_weight', ['balanced', None])
  model = SVC(kernel = kernel, C = C, degree = degree, class_weight= class_weight)
  
  #return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'roc_auc', n_jobs = -1))
  kf = StratifiedKFold(n_splits= 10)
  mean_metric = []
  for idx in kf.split(x, y):
    train_idx, test_idx = idx[0], idx[1]
    x_train = x.iloc[train_idx]
    y_train = y[train_idx]
    x_test = x.iloc[test_idx]
    y_test = y[test_idx]
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    result = accuracy_score(y_test, pred)
    mean_metric.append(pred)
    return -np.mean(mean_metric)

In [52]:
optimization_function = partial(optimize_svc, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-22 12:17:31,215][0m A new study created in memory with name: no-name-7bfaa3a6-6b5f-49d7-8435-0302fe368880[0m
[32m[I 2021-08-22 12:17:31,780][0m Trial 0 finished with value: -1.0 and parameters: {'kernel': 'poly', 'C': 3.675934766696848, 'degree': 9, 'class_weight': None}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12:17:31,784][0m Trial 1 finished with value: -1.0 and parameters: {'kernel': 'rbf', 'C': 9.408716030641681, 'degree': 5, 'class_weight': None}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12:17:32,663][0m Trial 2 finished with value: -0.9333333333333333 and parameters: {'kernel': 'poly', 'C': 1.1445592190876739, 'degree': 10, 'class_weight': 'balanced'}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12:17:32,687][0m Trial 3 finished with value: -1.0 and parameters: {'kernel': 'poly', 'C': 5.111687128986247, 'degree': 7, 'class_weight': None}. Best is trial 0 with value: -1.0.[0m
[32m[I 2021-08-22 12:17:32,693][0

# Decision Tree Classifier

In [61]:
from sklearn.tree import DecisionTreeClassifier
def optimize_dtc(trail, x, y):
  criterion = trail.suggest_categorical('criterion', ['gini', 'entropy'])
  splitter = trail.suggest_categorical('splitter', ['best', 'random'])
  max_depth = trail.suggest_int('max_depth', 1, 80)
  min_samples_split = trail.suggest_int('min_samples_split', 3, 10)
  min_samples_leaf =  trail.suggest_int('min_samples_leaf', 3, 10)
  ccp_alpha = trail.suggest_uniform('ccp_alpha', 0, 1.0)
  #class_weight = trial.suggest_categorical('class_weight', ['balanced', None])
  model = DecisionTreeClassifier(criterion = criterion,
                                splitter = splitter,
                                max_depth = max_depth,
                                min_samples_leaf = min_samples_leaf,
                                min_samples_split = min_samples_split,
                                ccp_alpha = ccp_alpha)
                                #class_weight = class_weight)

  
  #return - np.mean(cross_val_score(model, x, y, cv = 10, scoring = 'roc_auc', n_jobs = -1))
  kf = StratifiedKFold(n_splits= 10)
  mean_metric = []
  for idx in kf.split(x, y):
    train_idx, test_idx = idx[0], idx[1]
    x_train = x.iloc[train_idx]
    y_train = y[train_idx]
    x_test = x.iloc[test_idx]
    y_test = y[test_idx]
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    result = accuracy_score(y_test, pred)
    mean_metric.append(pred)
    return -np.mean(mean_metric)

In [62]:
optimization_function = partial(optimize_dtc, x = x_train, y = y_train)
study = optuna.create_study(direction= 'minimize')
study.optimize(optimization_function, n_trials=15)

[32m[I 2021-08-22 12:20:28,193][0m A new study created in memory with name: no-name-8f384564-78d4-4c8c-83d0-cf344c0a9add[0m
[32m[I 2021-08-22 12:20:28,197][0m Trial 0 finished with value: -0.0 and parameters: {'criterion': 'entropy', 'splitter': 'random', 'max_depth': 60, 'min_samples_split': 5, 'min_samples_leaf': 5, 'ccp_alpha': 0.4523734691953033}. Best is trial 0 with value: -0.0.[0m
[32m[I 2021-08-22 12:20:28,201][0m Trial 1 finished with value: -0.0 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 46, 'min_samples_split': 4, 'min_samples_leaf': 5, 'ccp_alpha': 0.6936262272491126}. Best is trial 0 with value: -0.0.[0m
[32m[I 2021-08-22 12:20:28,205][0m Trial 2 finished with value: -0.6666666666666666 and parameters: {'criterion': 'entropy', 'splitter': 'random', 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 9, 'ccp_alpha': 0.7480826241276173}. Best is trial 2 with value: -0.6666666666666666.[0m
[32m[I 2021-08-22 12:20:28,209][0m T