In [1]:
# imporing all the necessary modules 
import pandas as pd
import sklearn
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
%config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings("ignore")

In [2]:
with open('/kaggle/input/vgg19-isic-17/X_train_vgg19.txt', 'r') as f:
    X_train = np.loadtxt(f)

with open('/kaggle/input/vgg16-isic-17/y_train_vgg.txt', 'r') as f:
    y_train = np.loadtxt(f)
    
with open('/kaggle/input/vgg19-isic-17/X_test_vgg19.txt', 'r') as f:
    X_test = np.loadtxt(f)

with open('/kaggle/input/vgg16-isic-17/y_test_vgg.txt', 'r') as f:
    y_test = np.loadtxt(f)

In [3]:
with open('/kaggle/input/eb0-3-featurevectors/X_train_b0_3.txt', 'r') as f:
    X_train_xg = np.loadtxt(f)

with open('/kaggle/input/new-sc-feature-vectors/y_train.txt', 'r') as f:
    y_train_xg = np.loadtxt(f)
    
with open('/kaggle/input/eb0-3-featurevectors/X_test_b0_3.txt', 'r') as f:
    X_test_xg = np.loadtxt(f)

with open('/kaggle/input/new-sc-feature-vectors/y_test.txt', 'r') as f:
    y_test_xg = np.loadtxt(f)

In [4]:
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix, precision_score, f1_score, roc_auc_score

def eval_clf(y_test, y_pred):
    print(f'Accuracy : {accuracy_score(y_test, y_pred)}')
    print('-----------------------------')
    print(f'Sensitivity/Recall: {recall_score(y_test, y_pred)}')
    print('-----------------------------')
    print(f'Specificity: {recall_score(y_test, y_pred, pos_label=0)}')
    print('-----------------------------')
    print(f'Precision : {precision_score(y_test, y_pred)}')
    print('-----------------------------')
    print(f'ROC:\n{roc_auc_score(y_test, y_pred)}')    
    print('-----------------------------')
    print(f'Confusion_Matrix:\n{confusion_matrix(y_test, y_pred)}')

## Preprocessing

### Standardization

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
len(X_train), len(y_train), len(X_test), len(y_test)

(3870, 3870, 600, 600)

In [6]:
sc = StandardScaler()
X_train_xf1 = sc.fit_transform(X_train_xg)
X_test_xf1 = sc.transform(X_test_xg)

## Import Libraries

In [7]:
import xgboost as xgb

In [8]:
import sys
!cp ../input/rapids/rapids.21.06 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.6/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.6"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

## Trial 1

In [9]:
import optuna
from cuml.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
# import xgboost as xgb
from sklearn.ensemble import VotingClassifier

In [10]:
def objective(trial):
    # Define the hyperparameters to be optimized for each classifier
    rf_params = {
        "n_estimators": trial.suggest_int("n_estimators_1", 50, 500, 11),
        "max_depth": trial.suggest_int("max_depth_1", 2, 60),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 60),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 60),
        "max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2"]),
        "bootstrap": trial.suggest_categorical("bootstrap", [True, False]),
        'split_criterion':trial.suggest_categorical('split_criterion', ['gini', 'entropy'])
    }
    
    knn_params = {
        'n_neighbors': trial.suggest_categorical('knn_n_neighbors', [1, 3 ,5]),
        'weights': trial.suggest_categorical('knn_weights', ['uniform', 'distance']),
        'metric': trial.suggest_categorical('metric', ['minkowski','euclidean','manhattan'])
    }
    
    xgb_params = {
        'max_depth': trial.suggest_int('max_depth', 2, 25),
        'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1.0, 0.05),
        'n_estimators': trial.suggest_int('n_estimators', 50, 700, 20),
        'eta': trial.suggest_discrete_uniform('eta', 0.01, 0.1, 0.01),
        'reg_alpha': trial.suggest_int('reg_alpha', 1, 50),
        'gamma':trial.suggest_uniform('gamma', 0, 10),
        'reg_lambda': trial.suggest_int('reg_lambda', 5, 100),
        'min_child_weight': trial.suggest_int('min_child_weight', 2, 20),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
        'nthread' : -1    

    }
    # Create the classifiers
    rf = RandomForestClassifier(**rf_params)
    xgbclf = xgb.XGBClassifier(random_state=42, 
                             tree_method='gpu_hist', 
                             gpu_id=0, 
                             predictor="gpu_predictor"
                             ,**xgb_params )  
    knn = KNeighborsClassifier(**knn_params)

    # Create the ensemble classifier
    estimators = [('rf', rf), ('xgb', xgbclf), ('knn', knn)]
    ensemble = VotingClassifier(estimators=estimators, voting = 'hard')

    # Train and evaluate the ensemble classifier
    ensemble.fit(X_train, y_train)
    y_pred = ensemble.predict(X_test)
    prec = precision_score(y_test, y_pred)
#     recall = recall_score(y_test, y_pred)

    # Return the evaluation metric
    return prec

In [11]:
study_1 = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study_1.optimize(objective, n_trials=1000, show_progress_bar = True)

[32m[I 2023-04-05 08:46:28,543][0m A new study created in memory with name: no-name-d7242cab-cc31-46aa-b6ad-2d2c56e3b562[0m


  0%|          | 0/1000 [00:00<?, ?it/s]

[32m[I 2023-04-05 08:46:49,602][0m Trial 0 finished with value: 0.3370165745856354 and parameters: {'n_estimators_1': 50, 'max_depth_1': 12, 'min_samples_split': 56, 'min_samples_leaf': 2, 'max_features': 'auto', 'bootstrap': False, 'split_criterion': 'gini', 'knn_n_neighbors': 5, 'knn_weights': 'distance', 'metric': 'euclidean', 'max_depth': 23, 'subsample': 0.7, 'n_estimators': 150, 'eta': 0.06999999999999999, 'reg_alpha': 33, 'gamma': 8.862281419378352, 'reg_lambda': 27, 'min_child_weight': 10, 'colsample_bytree': 0.27808315580007065}. Best is trial 0 with value: 0.3370165745856354.[0m
[32m[I 2023-04-05 08:46:53,456][0m Trial 1 finished with value: 0.3128205128205128 and parameters: {'n_estimators_1': 369, 'max_depth_1': 9, 'min_samples_split': 24, 'min_samples_leaf': 58, 'max_features': 'sqrt', 'bootstrap': True, 'split_criterion': 'gini', 'knn_n_neighbors': 3, 'knn_weights': 'distance', 'metric': 'euclidean', 'max_depth': 6, 'subsample': 0.85, 'n_estimators': 490, 'eta': 0.01

In [12]:
optuna.visualization.plot_slice(study_1)

In [13]:
optuna.visualization.plot_param_importances(study_1)

In [14]:
optuna.visualization.plot_optimization_history(study_1)

In [15]:
study_1.best_params

{'n_estimators_1': 358,
 'max_depth_1': 15,
 'min_samples_split': 14,
 'min_samples_leaf': 1,
 'max_features': 'auto',
 'bootstrap': False,
 'split_criterion': 'gini',
 'knn_n_neighbors': 1,
 'knn_weights': 'uniform',
 'metric': 'manhattan',
 'max_depth': 11,
 'subsample': 0.9,
 'n_estimators': 570,
 'eta': 0.03,
 'reg_alpha': 2,
 'gamma': 0.34179757647587583,
 'reg_lambda': 9,
 'min_child_weight': 16,
 'colsample_bytree': 0.53859494875026}

## Evaluation

In [16]:
all_rf_st_1 = {'n_estimators': 358,
 'max_depth': 15,
 'min_samples_split': 14,
 'min_samples_leaf': 1,
 'max_features': 'auto',
 'bootstrap': False,
 'split_criterion': 'gini',}

all_knn_st_1 = { 'n_neighbors': 1,
 'weights': 'uniform',
 'metric': 'manhattan'}

all_xgb_st_1 = {'max_depth': 11,
 'subsample': 0.9,
 'n_estimators': 570,
 'eta': 0.03,
 'reg_alpha': 2,
 'gamma': 0.34179757647587583,
 'reg_lambda': 9,
 'min_child_weight': 16,
 'colsample_bytree': 0.53859494875026}

all_xgb_f1 = {'max_depth': 17,
 'subsample': 0.9,
 'n_estimators': 95,
 'eta': 0.07600000000000001,
 'reg_alpha': 1,
 'gamma': 2.8591358197630745,
 'reg_lambda': 8,
 'min_child_weight': 9,
 'colsample_bytree': 0.4604302286209221}

In [17]:
rf = RandomForestClassifier(**all_rf_st_1)
xgbclf = xgb.XGBClassifier(random_state=42, 
                         tree_method='gpu_hist', 
                         gpu_id=0, 
                         predictor="gpu_predictor"
                         ,**all_xgb_st_1)  
knn = KNeighborsClassifier(**all_knn_st_1)

# Create the ensemble classifier
estimators = [('rf', rf), ('xgb', xgbclf), ('knn', knn)]
ensemble = VotingClassifier(estimators=estimators, voting = 'hard')

# Train and evaluate the ensemble classifier
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
eval_clf(y_test, y_pred)

Accuracy : 0.805
-----------------------------
Sensitivity/Recall: 0.27350427350427353
-----------------------------
Specificity: 0.9337474120082816
-----------------------------
Precision : 0.5
-----------------------------
ROC:
0.6036258427562775
-----------------------------
Confusion_Matrix:
[[451  32]
 [ 85  32]]


In [18]:
from sklearn.model_selection import train_test_split
X_train_xg, __, y_train_xg, __  = train_test_split(X_train_xg, y_train_xg ,test_size=0.25,random_state=42, stratify = y_train_xg)

In [19]:
model = xgb.XGBClassifier(random_state=42, 
                             tree_method='gpu_hist', 
                             gpu_id=0, 
                             predictor="gpu_predictor"
                             ,**all_xgb_f1)
model.fit(X_train_xg, y_train_xg)
preds = model.predict(X_test_xg)
eval_clf(y_test, preds)

Accuracy : 0.7916666666666666
-----------------------------
Sensitivity/Recall: 0.6239316239316239
-----------------------------
Specificity: 0.8322981366459627
-----------------------------
Precision : 0.474025974025974
-----------------------------
ROC:
0.7281148802887933
-----------------------------
Confusion_Matrix:
[[402  81]
 [ 44  73]]


In [20]:
from sklearn.ensemble import VotingClassifier
class HierarchicalClassifier:
    def __init__(self, lv1_clf, clf2, clf3, clf4):
        self.lv1_clf = lv1_clf
        self.lv2_clf = VotingClassifier(estimators=[('clf2', clf2), ('clf3', clf3), ('clf4', clf4)], voting='hard')
    
    def fit(self, X_train_xg, y_train_xg, X_train, y_train):
        self.lv1_clf.fit(X_train_xg, y_train_xg)
        self.lv2_clf.fit(X_train, y_train)
    
    def predict(self, X_test, X_test_xg):
        y_pred = []
        for i in range(len(X_test)):
            pred1 = self.lv1_clf.predict(np.array([X_test_xg[i]]))
            if pred1 != 1:
                pred = self.lv2_clf.predict(np.array([X_test[i]]))
            else:
                pred = pred1
            y_pred.append(pred)
        return y_pred

In [21]:
from sklearn.ensemble import AdaBoostClassifier
clf1 = xgb.XGBClassifier(**all_xgb_f1, random_state = 42, tree_method='gpu_hist', gpu_id=0, predictor="gpu_predictor")
clf2 = KNeighborsClassifier(**all_knn_st_1)
clf3 = xgb.XGBClassifier(random_state = 42, **all_xgb_st_1, tree_method='gpu_hist', gpu_id=0, predictor="gpu_predictor")
clf4 = RandomForestClassifier(**all_rf_st_1, random_state = 42)
hclf = HierarchicalClassifier(lv1_clf=clf1, clf2=clf2, clf3=clf3, clf4=clf4)
hclf.fit(X_train_xg, y_train_xg, X_train, y_train)
pred = hclf.predict(X_test, X_test_xf1)
eval_clf(y_test, pred)

Accuracy : 0.8116666666666666
-----------------------------
Sensitivity/Recall: 0.4188034188034188
-----------------------------
Specificity: 0.906832298136646
-----------------------------
Precision : 0.5212765957446809
-----------------------------
ROC:
0.6628178584700324
-----------------------------
Confusion_Matrix:
[[438  45]
 [ 68  49]]


## Trial 2

In [22]:
def objective(trial):
    # Define the hyperparameters to be optimized for each classifier
    rf_params = {
        "n_estimators": trial.suggest_int("n_estimators_1", 259, 460, 6),
        "max_depth": trial.suggest_int("max_depth_1", 8, 25),
        "min_samples_split": trial.suggest_int("min_samples_split", 8, 22),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 12),
        "max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2"]),
        "bootstrap": trial.suggest_categorical("bootstrap", [True, False]),
        'split_criterion':trial.suggest_categorical('split_criterion', ['gini', 'entropy'])
    }
    
    knn_params = {
        'n_neighbors': trial.suggest_categorical('knn_n_neighbors', [1, 3 ,5]),
        'weights': trial.suggest_categorical('knn_weights', ['uniform', 'distance']),
        'metric': trial.suggest_categorical('metric', ['minkowski','euclidean','manhattan'])
    }
    
    xgb_params = {
        'max_depth': trial.suggest_int('max_depth', 8, 18),
        'subsample': trial.suggest_discrete_uniform('subsample', 0.3, 1.0, 0.05),
        'n_estimators': trial.suggest_int('n_estimators', 370, 620, 7),
        'eta': trial.suggest_discrete_uniform('eta', 0.01, 0.06, 0.005),
        'reg_alpha': trial.suggest_float('reg_alpha', 1, 6),
        'gamma':trial.suggest_uniform('gamma', 0, 3),
        'reg_lambda': trial.suggest_float('reg_lambda', 1, 16),
        'min_child_weight': trial.suggest_int('min_child_weight', 10, 26),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.38, 0.92),
        'nthread' : -1    

    }
    # Create the classifiers
    rf = RandomForestClassifier(**rf_params)
    xgbclf = xgb.XGBClassifier(random_state=42, 
                             tree_method='gpu_hist', 
                             gpu_id=0, 
                             predictor="gpu_predictor"
                             ,**xgb_params )  
    knn = KNeighborsClassifier(**knn_params)

    # Create the ensemble classifier
    estimators = [('rf', rf), ('xgb', xgbclf), ('knn', knn)]
    ensemble = VotingClassifier(estimators=estimators, voting = 'hard')

    # Train and evaluate the ensemble classifier
    ensemble.fit(X_train, y_train)
    y_pred = ensemble.predict(X_test)
    prec = precision_score(y_test, y_pred)
#     recall = recall_score(y_test, y_pred)

    # Return the evaluation metric
    return prec

In [23]:
study_2 = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study_2.optimize(objective, n_trials=1000, show_progress_bar = True)

[32m[I 2023-04-05 11:49:04,177][0m A new study created in memory with name: no-name-353da2c9-0a6a-4a65-a0e6-57532fff8202[0m


  0%|          | 0/1000 [00:00<?, ?it/s]

[32m[I 2023-04-05 11:49:08,604][0m Trial 0 finished with value: 0.4418604651162791 and parameters: {'n_estimators_1': 427, 'max_depth_1': 18, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'log2', 'bootstrap': False, 'split_criterion': 'gini', 'knn_n_neighbors': 1, 'knn_weights': 'uniform', 'metric': 'minkowski', 'max_depth': 10, 'subsample': 0.95, 'n_estimators': 489, 'eta': 0.03, 'reg_alpha': 4.482273784850107, 'gamma': 2.2126797473718636, 'reg_lambda': 11.483121504303293, 'min_child_weight': 18, 'colsample_bytree': 0.8535811213420328}. Best is trial 0 with value: 0.4418604651162791.[0m
[32m[I 2023-04-05 11:49:11,894][0m Trial 1 finished with value: 0.40869565217391307 and parameters: {'n_estimators_1': 283, 'max_depth_1': 18, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'log2', 'bootstrap': False, 'split_criterion': 'gini', 'knn_n_neighbors': 3, 'knn_weights': 'distance', 'metric': 'euclidean', 'max_depth': 14, 'subsample': 0.5, 'n_estimators':

In [24]:
optuna.visualization.plot_param_importances(study_2)

In [25]:
optuna.visualization.plot_slice(study_2)

In [26]:
optuna.visualization.plot_optimization_history(study_2)

In [27]:
study_2.best_params

{'n_estimators_1': 355,
 'max_depth_1': 24,
 'min_samples_split': 8,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'bootstrap': False,
 'split_criterion': 'gini',
 'knn_n_neighbors': 1,
 'knn_weights': 'distance',
 'metric': 'manhattan',
 'max_depth': 14,
 'subsample': 0.9000000000000001,
 'n_estimators': 447,
 'eta': 0.06,
 'reg_alpha': 2.2242637694041845,
 'gamma': 0.5909737792575381,
 'reg_lambda': 12.079208074745926,
 'min_child_weight': 10,
 'colsample_bytree': 0.5572600565824948}

In [28]:
all_rf_st_1 = {'n_estimators': 355,
 'max_depth': 24,
 'min_samples_split': 8,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'bootstrap': False,
 'split_criterion': 'gini',}

all_knn_st_1 = {'n_neighbors': 1,
 'weights': 'distance',
 'metric': 'manhattan'}

all_xgb_st_1 = { 'max_depth': 14,
 'subsample': 0.9000000000000001,
 'n_estimators': 447,
 'eta': 0.06,
 'reg_alpha': 2.2242637694041845,
 'gamma': 0.5909737792575381,
 'reg_lambda': 12.079208074745926,
 'min_child_weight': 10,
 'colsample_bytree': 0.5572600565824948}

all_xgb_f1 = {'max_depth': 17,
 'subsample': 0.9,
 'n_estimators': 95,
 'eta': 0.07600000000000001,
 'reg_alpha': 1,
 'gamma': 2.8591358197630745,
 'reg_lambda': 8,
 'min_child_weight': 9,
 'colsample_bytree': 0.4604302286209221}

In [29]:
rf = RandomForestClassifier(**all_rf_st_1)
xgbclf = xgb.XGBClassifier(random_state=42, 
                         tree_method='gpu_hist', 
                         gpu_id=0, 
                         predictor="gpu_predictor"
                         ,**all_xgb_st_1)  
knn = KNeighborsClassifier(**all_knn_st_1)

# Create the ensemble classifier
estimators = [('rf', rf), ('xgb', xgbclf), ('knn', knn)]
ensemble = VotingClassifier(estimators=estimators, voting = 'hard')

# Train and evaluate the ensemble classifier
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
eval_clf(y_test, y_pred)

Accuracy : 0.8083333333333333
-----------------------------
Sensitivity/Recall: 0.26495726495726496
-----------------------------
Specificity: 0.9399585921325052
-----------------------------
Precision : 0.5166666666666667
-----------------------------
ROC:
0.6024579285448851
-----------------------------
Confusion_Matrix:
[[454  29]
 [ 86  31]]


In [30]:
from sklearn.ensemble import AdaBoostClassifier
clf1 = xgb.XGBClassifier(**all_xgb_f1, random_state = 42, tree_method='gpu_hist', gpu_id=0, predictor="gpu_predictor")
clf2 = KNeighborsClassifier(**all_knn_st_1)
clf3 = xgb.XGBClassifier(random_state = 42, **all_xgb_st_1, tree_method='gpu_hist', gpu_id=0, predictor="gpu_predictor")
clf4 = RandomForestClassifier(**all_rf_st_1, random_state = 42)
hclf = HierarchicalClassifier(lv1_clf=clf1, clf2=clf2, clf3=clf3, clf4=clf4)
hclf.fit(X_train_xg, y_train_xg, X_train, y_train)
pred = hclf.predict(X_test, X_test_xg)
eval_clf(y_test, pred)

Accuracy : 0.7816666666666666
-----------------------------
Sensitivity/Recall: 0.6752136752136753
-----------------------------
Specificity: 0.8074534161490683
-----------------------------
Precision : 0.45930232558139533
-----------------------------
ROC:
0.7413335456813718
-----------------------------
Confusion_Matrix:
[[390  93]
 [ 38  79]]


## Trial 3

In [None]:
def objective(trial):
    # Define the hyperparameters to be optimized for each classifier
    rf_params = {
        "n_estimators": trial.suggest_int("n_estimators_1", 380, 550, 11),
        "max_depth": trial.suggest_int("max_depth_1", 33, 45),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 5),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 5),
        "max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2"]),
        "bootstrap": trial.suggest_categorical("bootstrap", [True, False]),
        'split_criterion':trial.suggest_categorical('split_criterion', ['gini', 'entropy'])
    }
    
    knn_params = {
        'n_neighbors': trial.suggest_categorical('knn_n_neighbors', [1, 3 ,5]),
        'weights': trial.suggest_categorical('knn_weights', ['uniform', 'distance']),
        'metric': trial.suggest_categorical('metric', ['minkowski','euclidean','manhattan'])
    }
    
    xgb_params = {
        'max_depth': trial.suggest_int('max_depth', 2, 9),
        'subsample': trial.suggest_discrete_uniform('subsample', 0.85, 1.0, 0.05),
        'n_estimators': trial.suggest_int('n_estimators', 530, 630, 10),
        'eta': trial.suggest_discrete_uniform('eta', 0.005, 0.1, 0.005),
        'reg_alpha': trial.suggest_int('reg_alpha', 2, 9),
        'gamma':trial.suggest_uniform('gamma', 0, 1),
        'reg_lambda': trial.suggest_int('reg_lambda', 9, 31),
        'min_child_weight': trial.suggest_int('min_child_weight', 6, 12),
#         "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
        'nthread' : -1    

    }
    # Create the classifiers
    rf = RandomForestClassifier(**rf_params)
    xgbclf = xgb.XGBClassifier(random_state=42, 
                             tree_method='gpu_hist', 
                             gpu_id=0, 
                             predictor="gpu_predictor"
                             ,**xgb_params )  
    knn = KNeighborsClassifier(**knn_params)

    # Create the ensemble classifier
    estimators = [('rf', rf), ('xgb', xgbclf), ('knn', knn)]
    ensemble = VotingClassifier(estimators=estimators, voting = 'hard')

    # Train and evaluate the ensemble classifier
    ensemble.fit(X_train, y_train)
    y_pred = ensemble.predict(X_test)
    f1 = f1_score(y_test, y_pred)
#     recall = recall_score(y_test, y_pred)

    # Return the evaluation metric
    return f1

In [None]:
study_3 = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study_3.optimize(objective, n_trials=1000, show_progress_bar = True)

In [None]:
optuna.visualization.plot_param_importances(study_3)

In [None]:
optuna.visualization.plot_optimization_history(study_3)

In [None]:
optuna.visualization.plot_slice(study_3)

In [None]:
study_3.best_params

In [None]:
all_rf_st_1 = {'n_estimators': 413,
 'max_depth': 36,
 'min_samples_split': 2,
 'min_samples_leaf': 5,
 'max_features': 'log2',
 'bootstrap': True,
 'split_criterion': 'gini',}

all_knn_st_1 = {'n_neighbors': 3,
 'weights': 'uniform',
 'metric': 'manhattan',}

all_xgb_st_1 = { 'max_depth': 8,
 'subsample': 0.9,
 'n_estimators': 600,
 'eta': 0.06,
 'reg_alpha': 9,
 'gamma': 0.8256664454721765,
 'reg_lambda': 21,
 'min_child_weight': 6}

all_xgb_f1 = {'max_depth': 17,
 'subsample': 0.9,
 'n_estimators': 95,
 'eta': 0.07600000000000001,
 'reg_alpha': 1,
 'gamma': 2.8591358197630745,
 'reg_lambda': 8,
 'min_child_weight': 9,
 'colsample_bytree': 0.4604302286209221}

In [None]:
rf = RandomForestClassifier(**all_rf_st_1)
xgbclf = xgb.XGBClassifier(random_state=42, 
                         tree_method='gpu_hist', 
                         gpu_id=0, 
                         predictor="gpu_predictor"
                         ,**all_xgb_st_1)  
knn = KNeighborsClassifier(**all_knn_st_1)

# Create the ensemble classifier
estimators = [('rf', rf), ('xgb', xgbclf), ('knn', knn)]
ensemble = VotingClassifier(estimators=estimators, voting = 'hard')

# Train and evaluate the ensemble classifier
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
eval_clf(y_test, y_pred)

In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf1 = xgb.XGBClassifier(**all_xgb_f1, random_state = 42, tree_method='gpu_hist', gpu_id=0, predictor="gpu_predictor")
clf2 = KNeighborsClassifier(**all_knn_st_1)
clf3 = xgb.XGBClassifier(random_state = 42, **all_xgb_st_1, tree_method='gpu_hist', gpu_id=0, predictor="gpu_predictor")
clf4 = RandomForestClassifier(**all_rf_st_1, random_state = 42)
hclf = HierarchicalClassifier(lv1_clf=clf1, clf2=clf2, clf3=clf3, clf4=clf4)
hclf.fit(X_train_xg, y_train_xg, X_train, y_train)
pred = hclf.predict(X_test, X_test_xg)
eval_clf(y_test, pred)