In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import ADASYN
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE
from imblearn.over_sampling import SVMSMOTE
from bayes_opt import BayesianOptimization
from mlxtend.classifier import StackingClassifier
import matplotlib.pyplot as plt
import warnings; warnings.filterwarnings(action='ignore')

In [None]:
#load data
pathtrain = "/path of train.txt/"
pathtest = "/path of test.txt/"
datatrain = np.loadtxt(pathtrain, dtype=float, delimiter='\t')
datatest = np.loadtxt(pathtest, dtype=float, delimiter='\t')

train_data_nosmo, train_label_nosmo=np.split(datatrain,indices_or_sections=(3,),axis=1) 
test_data, test_label=np.split(datatest,indices_or_sections=(3,),axis=1)

smo = SMOTE(random_state=1,k_neighbors=5)
#smo = BorderlineSMOTE(kind='borderline-2',random_state=38, k_neighbors=3)
train_data, train_label = smo.fit_resample(train_data_nosmo, train_label_nosmo)
#print(Counter(train_label))

#ada = ADASYN(random_state=5,n_neighbors=3)
#train_data, train_label = ada.fit_resample(train_data_nosmo, train_label_nosmo)

In [None]:
#BayesianOptimization for SVM
hparams = {"c" : (0.5, 1000),
           "gamma" : (0.00001, 1)}


def svm_cv(c,gamma):
    val = cross_val_score(svm.SVC(C =c,gamma = gamma,kernel='rbf',decision_function_shape='ovo',probability=True,random_state=0),
                          train_data, train_label, scoring='roc_auc', cv=5).mean()
    return val



svm_bo = BayesianOptimization(f = svm_cv, pbounds = hparams)

svm_bo.maximize(n_iter=50) #n_iter=10
svm_bo.max

In [None]:
#BayesianOptimization for RF
def rf_cv(n_estimators, min_samples_split, max_features, max_depth):
    val = cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators),
                                                 min_samples_split=int(min_samples_split),
                                                 max_features=min(max_features, 0.999), # float
                                                 max_depth=int(max_depth),
                                                 random_state=0),
                          train_data, train_label, scoring='roc_auc', cv=5).mean()
    return val

rf_bo = BayesianOptimization(rf_cv,{'n_estimators': (10, 250),
                                    'min_samples_split': (2, 25),
                                    'max_features': (0.1, 0.999),
                                    'max_depth': (5, 15)})
rf_bo.maximize() #n_iter=10
rf_bo.max

In [None]:
##BayesianOptimization for GBDT
def gbdt_cv(n_estimators, min_samples_split, max_features, max_depth, learning_rate, subsample):
    val = cross_val_score(GradientBoostingClassifier(n_estimators=int(n_estimators),
                                                     min_samples_split=int(min_samples_split),
                                                     max_features=min(max_features, 0.999), # float
                                                     max_depth=int(max_depth),
                                                     learning_rate=min(learning_rate,0.999),
                                                     subsample=min(subsample,0.999),
                                                     random_state=1123),
                          train_data, train_label, scoring='roc_auc', cv=5).mean()
    return val

gbdt_bo = BayesianOptimization(gbdt_cv,{'n_estimators': (10, 250),
                                        'min_samples_split': (2, 25),
                                        'max_features': (0.1, 0.999),
                                        'max_depth': (3, 15),
                                        'learning_rate':(0.1,0.999),
                                        'subsample':(0.1,0.999)})

gbdt_bo.maximize() #n_iter=10
gbdt_bo.max