In [None]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#importing datasets
normal = pd.read_csv('ptbdb_normal.csv')
abnormal = pd.read_csv('ptbdb_abnormal.csv')

In [None]:
#viewing normal dataset
normal.head()

In [None]:
#viewing abnormal dataset
abnormal.head()

In [None]:
#dimenion for normal
normal.shape

In [None]:
#dimension for abnormal
abnormal.shape

In [None]:
#changing the random column names to sequential - normal
#as we have some numbers name as columns we need to change that to numbers as
for normals in normal:
    normal.columns = list(range(len(normal.columns)))

In [None]:
#viewing edited columns for normal data
normal.head()

In [None]:
#changing the random column names to sequential - abnormal
#as we have some numbers name as columns we need to change that to numbers as
for abnormals in abnormal:
    abnormal.columns = list(range(len(abnormal.columns)))

In [None]:
#viewing edited columns for abnormal data
abnormal.head()

In [None]:
#combining two data into one
#suffling the dataset and dropping the index
#As when concatenating we all have arranged 0 and 1 class in order manner
dataset = pd.concat([normal, abnormal], axis=0).sample(frac=1.0, random_state =0).reset_index(drop=True)

In [None]:
#viewing combined dataset
dataset.head()

In [None]:
dataset.shape

In [None]:
#basic info of statistics
dataset.describe()

In [None]:
#basic information of dataset
dataset.info()

In [None]:
#viewing the uniqueness in dataset
dataset.nunique()

In [None]:
#skewness of the dataset
#the deviation of the distribution of the data from a normal distribution
#+ve mean > median > mode
#-ve mean < median < mode
dataset.skew()

In [None]:
#kurtosis of dataset
#identifies whether the tails of a given distribution contain extreme values
#Leptokurtic indicates a positive excess kurtosis
#mesokurtic distribution shows an excess kurtosis of zero or close to zero
#platykurtic distribution shows a negative excess kurtosis
dataset.kurtosis()

In [None]:
#missing values any from the dataset
print(str('Any missing data or NaN in the dataset:'), dataset.isnull().values.any())

In [None]:
#data ranges in the dataset - sample
print("The minimum and maximum values are {}, {}".format(np.min(dataset.iloc[-2,:].values), np.max(dataset.iloc[-2,:].values)))

In [None]:
#correlation for all features in the dataset
correlation_data =dataset.corr()
print(correlation_data)

In [None]:
import seaborn as sns
#visulaization for correlation
plt.figure(figsize=(10,7.5))
sns.heatmap(correlation_data, annot=True, cmap='BrBG')

In [None]:
#for target value count
label_dataset = dataset[187].value_counts()
label_dataset


In [None]:
#visualization for target label
label_dataset.plot.bar()

In [None]:
#splitting dataset to dependent and independent variable
X = dataset.iloc[:,:-1].values #independent values / features
y = dataset.iloc[:,-1].values #dependent values / target

In [None]:
#splitting the datasets for training and testing process
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.3, random_state=42)

In [None]:
#size for the sets
print('size of X_train:', X_train.shape)
print('size of X_test:', X_test.shape)
print('size of y_train:', y_train.shape)
print('size of y_test:', y_test.shape)

**MACHINE LEARNING ALGORITHMS**

**LOGISTIC REGRESSION**

In [None]:
#Logistic Classifier
from sklearn.linear_model import LogisticRegression
classifier1 = LogisticRegression(random_state=1)
classifier1.fit(X_train,y_train)

In [None]:
y_pred1 = classifier1.predict(X_test)
#print(np.concatenate((y_pred1.reshape(len(y_pred1),1), y_test.reshape(len(y_test),1)),1))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [None]:
cm1 = confusion_matrix(y_test, y_pred1)
print(cm1)

In [None]:
accuracy_score(y_test, y_pred1)

In [None]:
roc_auc_score(y_test, y_pred1)

In [None]:
acc1 = cross_val_score(estimator = classifier1, X = X_train, y = y_train, cv = 10)
print("Accuracy of LR: {:.2f} %".format(acc1.mean()*100))
print("SD of LR: {:.2f} %".format(acc1.std()*100))

In [None]:
import sklearn.metrics as metrics
print(metrics.classification_report(y_test, y_pred1))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm1, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
from sklearn.model_selection import GridSearchCV
parameters_lr = [{'penalty':['l1','l2'],'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}]
grid_search_lr = GridSearchCV(estimator = classifier1,
                           param_grid = parameters_lr,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search_lr.fit(X_train, y_train)
best_accuracy_lr = grid_search_lr.best_score_
best_paramaeter_lr = grid_search_lr.best_params_  
print("Best Accuracy of LR: {:.2f} %".format(best_accuracy_lr.mean()*100))
print("Best Parameter of LR:", best_paramaeter_lr) 

**K Nearest Neighbour**

In [None]:
#KNearest Neighbour
from sklearn.neighbors import KNeighborsClassifier
classifier2 = KNeighborsClassifier()
classifier2.fit(X_train,y_train)

In [None]:
y_pred2 = classifier2.predict(X_test)
#print(np.concatenate((y_pred2.reshape(len(y_pred2),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm2 = confusion_matrix(y_test, y_pred2)
print(cm2)

In [None]:
accuracy_score(y_test, y_pred2)

In [None]:
roc_auc_score(y_test, y_pred2)

In [None]:
acc2 = cross_val_score(estimator = classifier2, X = X_train, y = y_train, cv = 10)
print("Accuracy of KNN: {:.2f} %".format(acc2.mean()*100))
print("SD of KNN: {:.2f} %".format(acc2.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred2))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm2, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
from sklearn.model_selection import GridSearchCV
parameters_knn = [{'n_neighbors':[3,5,7,9,11,13,15,17,19,21],
                   'weights': ['uniform','distance'],
                    'metric':['euclidean','manhattan']}]
grid_search_knn = GridSearchCV(estimator = classifier2,
                           param_grid = parameters_knn,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search_knn.fit(X_train, y_train)
best_accuracy_knn = grid_search_knn.best_score_
best_paramaeter_knn = grid_search_knn.best_params_  
print("Best Accuracy of KNN: {:.2f} %".format(best_accuracy_knn.mean()*100))
print("Best Parameter of KNN:", best_paramaeter_knn) 

**Support Vector Machine**

In [None]:
#Support Vector Machine
from sklearn.svm import SVC
classifier3 = SVC(kernel = 'linear', random_state =1)
classifier3.fit(X_train,y_train)

In [None]:
y_pred3 = classifier3.predict(X_test)
#print(np.concatenate((y_pred3.reshape(len(y_pred3),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm3 = confusion_matrix(y_test, y_pred3)
print(cm3)

In [None]:
accuracy_score(y_test, y_pred3)

In [None]:
roc_auc_score(y_test, y_pred3)

In [None]:
acc3 = cross_val_score(estimator = classifier3, X = X_train, y = y_train, cv = 10)
print("Accuracy of SVM: {:.2f} %".format(acc3.mean()*100))
print("SD of SVM: {:.2f} %".format(acc3.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred3))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm3, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
classifier4 = SVC(kernel = 'rbf', random_state =1)
classifier4.fit(X_train,y_train)

In [None]:
y_pred4 = classifier4.predict(X_test)
#print(np.concatenate((y_pred4.reshape(len(y_pred4),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm4 = confusion_matrix(y_test, y_pred4)
print(cm4)

In [None]:
accuracy_score(y_test, y_pred4)

In [None]:
roc_auc_score(y_test, y_pred4)

In [None]:
acc4 = cross_val_score(estimator = classifier4, X = X_train, y = y_train, cv = 10)
print("Accuracy of SVM: {:.2f} %".format(acc4.mean()*100))
print("SD of SVM: {:.2f} %".format(acc4.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred4))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm4, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel':['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel':['rbf'], 'gamma':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = classifier4,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_paramaeter = grid_search.best_params_  
print("Best Accuracy of SVM: {:.2f} %".format(best_accuracy.mean()*100))
print("Best Parameter of SVM:", best_paramaeter)            

**Naive Bayes**

In [None]:
#Naive Bayes
from sklearn.naive_bayes import GaussianNB
classifier5 = GaussianNB()
classifier5.fit(X_train,y_train)

In [None]:
y_pred5 = classifier5.predict(X_test)
#print(np.concatenate((y_pred5.reshape(len(y_pred5),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm5 = confusion_matrix(y_test, y_pred5)
print(cm5)

In [None]:
accuracy_score(y_test, y_pred5)

In [None]:
roc_auc_score(y_test, y_pred5)

In [None]:
acc5 = cross_val_score(estimator = classifier5, X = X_train, y = y_train, cv = 10)
print("Accuracy of NB: {:.2f} %".format(acc5.mean()*100))
print("SD of NB: {:.2f} %".format(acc5.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred5))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm5, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

**Decision Tree**

In [None]:
#Decision Tree
from sklearn.tree import DecisionTreeClassifier
classifier6 = DecisionTreeClassifier(criterion = 'entropy', random_state=1)
classifier6.fit(X_train, y_train)

In [None]:
y_pred6 = classifier6.predict(X_test)
#print(np.concatenate((y_pred6.reshape(len(y_pred6),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm6 = confusion_matrix(y_test, y_pred6)
print(cm6)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm6, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
accuracy_score(y_test, y_pred6)

In [None]:
roc_auc_score(y_test, y_pred6)

In [None]:
acc6 = cross_val_score(estimator = classifier6, X = X_train, y = y_train, cv = 10)
print("Accuracy of DT: {:.2f} %".format(acc6.mean()*100))
print("SD of DT: {:.2f} %".format(acc6.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred6))

In [None]:
param_grid_dt = {
    'max_depth' : [2,4,6,8,10,12,14,16,18,20],
    'criterion' :['gini', 'entropy']
}
grid_search_dt = GridSearchCV(estimator= classifier6,
                              param_grid=param_grid_dt,
                              scoring='accuracy', 
                              n_jobs=-1)
grid_search_dt.fit(X_train, y_train)
best_accuracy_dt = grid_search_dt.best_score_
best_paramaeter_dt = grid_search_dt.best_params_ 
print("Best Accuracy of dt: {:.2f} %".format(best_accuracy_dt.mean()*100))
print("Best Parameter of dt:", best_paramaeter_dt)

**Random Forest**

In [None]:
#RandomForest
from sklearn.ensemble import RandomForestClassifier
classifier7 = RandomForestClassifier(criterion = 'entropy', random_state=1)
classifier7.fit(X_train, y_train)

In [None]:
y_pred7 = classifier7.predict(X_test)
#print(np.concatenate((y_pred7.reshape(len(y_pred7),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm7 = confusion_matrix(y_test, y_pred7)
print(cm7)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm7, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
accuracy_score(y_test, y_pred7)

In [None]:
roc_auc_score(y_test, y_pred7)

In [None]:
acc7 = cross_val_score(estimator = classifier7, X = X_train, y = y_train, cv = 10)
print("Accuracy of RF: {:.2f} %".format(acc7.mean()*100))
print("SD of RF: {:.2f} %".format(acc7.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred7))

In [None]:
param_grid_rf = { 
    'n_estimators': [200, 500,700,900,1100],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8,9,10,20,30,40,50],
    'criterion' :['gini', 'entropy']
}
grid_search_rf = GridSearchCV(estimator= classifier7,
                               param_grid=param_grid_rf,
                               scoring='accuracy', 
                               n_jobs=-1)
grid_search_rf.fit(X_train, y_train)
best_accuracy_rf = grid_search_rf.best_score_
best_paramaeter_rf = grid_search_rf.best_params_ 
print("Best Accuracy of rf: {:.2f} %".format(best_accuracy_rf.mean()*100))
print("Best Parameter of rf:", best_paramaeter_rf)

**XGBOOST**

In [None]:
#XGBOOST
from xgboost import XGBClassifier
classifier8 = XGBClassifier()
classifier8.fit(X_train,y_train)

In [None]:
y_pred8 = classifier8.predict(X_test)
#print(np.concatenate((y_pred8.reshape(len(y_pred8),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm8 = confusion_matrix(y_test, y_pred8)
print(cm8)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm8, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
accuracy_score(y_test, y_pred8)

In [None]:
roc_auc_score(y_test, y_pred8)

In [None]:
acc8 = cross_val_score(estimator = classifier8, X = X_train, y = y_train, cv = 10)
print("Accuracy of xgboost: {:.2f} %".format(acc8.mean()*100))
print("SD of xgboost: {:.2f} %".format(acc8.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred8))

In [None]:
# A parameter grid for XGBoost
parameter_xgboost = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5, 7, 9]
        }
grid_search_xgb = GridSearchCV(estimator= classifier8,
                               param_grid=parameter_xgboost,
                               scoring='accuracy', 
                               n_jobs=-1)
grid_search_xgb.fit(X_train, y_train)
best_accuracy_xgb = grid_search_xgb.best_score_
best_paramaeter_xgb = grid_search_xgb.best_params_ 
print("Best Accuracy of XGB: {:.2f} %".format(best_accuracy_xgb.mean()*100))
print("Best Parameter of XGB:", best_paramaeter_xgb)

**AdaBoost**

In [None]:
from sklearn.ensemble import AdaBoostClassifier
classifier9 = AdaBoostClassifier(random_state=1)
classifier9.fit(X_train, y_train)

In [None]:
y_pred9 = classifier9.predict(X_test)
#print(np.concatenate((y_pred9.reshape(len(y_pred9),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm9 = confusion_matrix(y_test, y_pred9)
print(cm9)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm9, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
accuracy_score(y_test, y_pred9)

In [None]:
roc_auc_score(y_test, y_pred9)

In [None]:
acc9 = cross_val_score(estimator = classifier9, X = X_train, y = y_train, cv = 10)
print("Accuracy of xgboost: {:.2f} %".format(acc9.mean()*100))
print("SD of xgboost: {:.2f} %".format(acc9.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred9))

In [None]:
from sklearn.model_selection import GridSearchCV
parameters_ada = [{'n_estimators':[100,200,300,400,500,1000,1500,2000],
                   'learning_rate':[0.001,0.01,0.1,1]}]
grid_search_ada = GridSearchCV(estimator = classifier9,
                           param_grid = parameters_ada,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search_ada.fit(X_train, y_train)
best_accuracy_ada = grid_search_ada.best_score_
best_paramaeter_ada = grid_search_ada.best_params_  
print("Best Accuracy of adaboost: {:.2f} %".format(best_accuracy_ada.mean()*100))
print("Best Parameter of adaboost:", best_paramaeter_ada) 

**Gradient Boost**

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
classifier10= GradientBoostingClassifier(n_estimators=20, learning_rate=0.5, max_features=2, max_depth=2, random_state=0)
classifier10.fit(X_train, y_train)

In [None]:
y_pred10 = classifier10.predict(X_test)
#print(np.concatenate((y_pred10.reshape(len(y_pred10),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm10 = confusion_matrix(y_test, y_pred10)
print(cm10)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm10, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
accuracy_score(y_test, y_pred10)

In [None]:
roc_auc_score(y_test, y_pred10)

In [None]:
acc10 = cross_val_score(estimator = classifier10, X = X_train, y = y_train, cv = 10)
print("Accuracy of gbboost: {:.2f} %".format(acc10.mean()*100))
print("SD of gbboost: {:.2f} %".format(acc10.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred10))

In [None]:
from sklearn.model_selection import GridSearchCV
parameters_gb = [{'learning_rate':[0.15,0.1,0.05,0.01,0.005,0.001], 
                  'n_estimators':[100,250,500,750,1000,1250,1500,1750]}]
grid_search_gb = GridSearchCV(estimator = classifier10,
                           param_grid = parameters_gb,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search_gb.fit(X_train, y_train)
best_accuracy_gb = grid_search_gb.best_score_
best_paramaeter_gb = grid_search_gb.best_params_  
print("Best Accuracy of gradient boost: {:.2f} %".format(best_accuracy_gb.mean()*100))
print("Best Parameter of gradient boost:", best_paramaeter_gb) 

**light GBM**

In [None]:
import lightgbm as lgb
classifier11= lgb.LGBMClassifier()
classifier11.fit(X_train, y_train)

In [None]:
y_pred11 = classifier11.predict(X_test)
#print(np.concatenate((y_pred11.reshape(len(y_pred11),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm11 = confusion_matrix(y_test, y_pred11)
print(cm11)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm11, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
accuracy_score(y_test, y_pred11)

In [None]:
roc_auc_score(y_test, y_pred11)

In [None]:
acc11 = cross_val_score(estimator = classifier11, X = X_train, y = y_train, cv = 10)
print("Accuracy of lightgbm: {:.2f} %".format(acc11.mean()*100))
print("SD of lightgbm: {:.2f} %".format(acc11.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred11))

In [None]:
from sklearn.model_selection import GridSearchCV
parameters_lb = [{'n_estimators': [400, 700, 1000,1500],
    'colsample_bytree': [0.7, 0.8, 0.9],
    'max_depth': [15,20,25,30,35],
    'num_leaves': [50, 100, 200,300],
    'reg_alpha': [1.1, 1.2, 1.3],
    'reg_lambda': [1.1, 1.2, 1.3],
    'min_split_gain': [0.3, 0.4],
    'subsample': [0.7, 0.8, 0.9],
    'subsample_freq': [20]}]
grid_search_lb = GridSearchCV(estimator = classifier11,
                           param_grid = parameters_lb,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search_lb.fit(X_train, y_train)
best_accuracy_lb = grid_search_lb.best_score_
best_paramaeter_lb = grid_search_lb.best_params_  
print("Best Accuracy of light boost: {:.2f} %".format(best_accuracy_lb.mean()*100))
print("Best Parameter of light boost:", best_paramaeter_lb) 

**CatBoost**

In [None]:
from catboost import CatBoostClassifier
from sklearn.model_selection import cross_val_score
classifier12 = CatBoostClassifier(max_depth=3)
classifier12.fit(X_train, y_train)

In [None]:
y_pred12 = classifier12.predict(X_test)
#print(np.concatenate((y_pred12.reshape(len(y_pred12),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm12 = confusion_matrix(y_test, y_pred12)
print(cm12)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=cm12, figsize=(6, 6), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
accuracy_score(y_test, y_pred12)

In [None]:
roc_auc_score(y_test, y_pred12)

In [None]:
acc12 = cross_val_score(estimator = classifier12, X = X_train, y = y_train, cv = 10)
print("Accuracy of catboost: {:.2f} %".format(acc12.mean()*100))
print("SD of catboost: {:.2f} %".format(acc12.std()*100))

In [None]:
print(metrics.classification_report(y_test, y_pred12))

In [None]:
from sklearn.model_selection import GridSearchCV
parameters_cat = [{'depth':[3,1,2,6,4,5,7,8,9,10],
          'iterations':[250,100,500,1000],
          'learning_rate':[0.03,0.001,0.01,0.1,0.2,0.3], 
          'l2_leaf_reg':[3,1,5,10,100],
          'border_count':[32,5,10,20,50,100,200],
          'ctr_border_count':[50,5,10,20,100,200],
          'thread_count':4}]
grid_search_cat = GridSearchCV(estimator = classifier12,
                           param_grid = parameters_cat,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search_cat.fit(X_train, y_train)
best_accuracy_cat = grid_search_cat.best_score_
best_paramaeter_cat = grid_search_cat.best_params_  
print("Best Accuracy of cat boost: {:.2f} %".format(best_accuracy_cat.mean()*100))
print("Best Parameter of cat boost:", best_paramaeter_cat) 