In [1]:
import pandas as pd
import pickle
import numpy as np
from sklearn.metrics import roc_curve, auc
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef

In [2]:
train = pd.read_csv('train.csv')
X_train = train.iloc[:,:-1]
y_train = train.iloc[:,-1]
train.shape

(217, 2603)

In [3]:
X_train = X_train[['SM1_Dzs','AATS1dv','C2SP2.1','AATSC0v','ATSC2m']]
X_train.head()

Unnamed: 0,SM1_Dzs,AATS1dv,C2SP2.1,AATSC0v,ATSC2m
0,3.062025,11.174603,10,43.713379,337.017311
1,-1.292857,8.266667,12,46.971209,111.863599
2,1.769358,9.673333,15,47.30865,165.223676
3,1.768889,10.424242,11,45.145236,18.615523
4,-0.510204,6.911111,10,54.043565,517.659334


In [4]:
test = pd.read_csv('test.csv')
X_test = test.loc[:,['SM1_Dzs','AATS1dv','C2SP2.1','AATSC0v','ATSC2m']]
y_test = test.iloc[:,-1]
X_test.shape

(55, 5)

In [5]:
#define model (parameter)
boost = AdaBoostClassifier(random_state=3)

In [6]:
#fit model
boost.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=3)

In [7]:
# predicting the train set results
y_pred_train = boost.predict(X_train)

In [8]:
# predicting the test set results
y_pred_test = boost.predict(X_test)

In [9]:
y_proba_train = boost.predict_proba(X_train)

In [10]:
y_proba_test = boost.predict_proba(X_test)

In [11]:
y_proba_train = y_proba_train[:,1]

In [12]:
y_proba_test = y_proba_test[:,1]

In [13]:
#y_proba_train

In [14]:
#y_proba_test

In [15]:
AUC_train = roc_auc_score(y_train, y_proba_train)
AUC_train

0.9507803121248499

In [16]:
AUC_test= roc_auc_score(y_test, y_proba_test)
AUC_test

0.6379310344827587

In [17]:
fpr1, tpr1, _ = roc_curve(y_train, y_proba_train)
fpr2, tpr2, _ = roc_curve(y_test, y_proba_test)

In [18]:
#Evaluating the Algorithm train set

print('confusion matrix:')
print(confusion_matrix(y_train,y_pred_train))
print('classification report:')
print(classification_report(y_train, y_pred_train))
print('accuracy score  :',accuracy_score(y_train, y_pred_train))
print('roc_auc score   :',roc_auc_score(y_train, y_proba_train))
print('MCC             :',matthews_corrcoef(y_train, y_pred_train))

confusion matrix:
[[ 81  17]
 [  9 110]]
classification report:
              precision    recall  f1-score   support

           0       0.90      0.83      0.86        98
           1       0.87      0.92      0.89       119

   micro avg       0.88      0.88      0.88       217
   macro avg       0.88      0.88      0.88       217
weighted avg       0.88      0.88      0.88       217

accuracy score  : 0.880184331797235
roc_auc score   : 0.9507803121248499
MCC             : 0.7584827636097249


In [19]:
#Evaluating the Algorithm test set

print('confusion matrix:')
print(confusion_matrix(y_test,y_pred_test))
print('classification report:')
print(classification_report(y_test, y_pred_test))
print('accuracy score  :',accuracy_score(y_test, y_pred_test))
print('roc_auc score   :',roc_auc_score(y_test, y_proba_test))
print('MCC             :',matthews_corrcoef(y_test, y_pred_test))

confusion matrix:
[[12 17]
 [ 5 21]]
classification report:
              precision    recall  f1-score   support

           0       0.71      0.41      0.52        29
           1       0.55      0.81      0.66        26

   micro avg       0.60      0.60      0.60        55
   macro avg       0.63      0.61      0.59        55
weighted avg       0.63      0.60      0.59        55

accuracy score  : 0.6
roc_auc score   : 0.6379310344827587
MCC             : 0.2392844844320316


In [20]:
#plt.plot(fpr1, tpr1, color='red', label='Model 4')
#plt.xlabel('False Positive Rate')
#plt.ylabel('True Positive Rate')
#plt.title('Receiver Operating Characteristic Train Set')
#plt.legend(loc="lower right")
#plt.savefig('ROC AB5 train.png', dpi = 1000)

In [21]:
#plt.plot(fpr2, tpr2, color='black', label='Model 4 - AB 5 (AUC = %0.2f)' % AUC_test)
#plt.xlabel('False Positive Rate')
#plt.ylabel('True Positive Rate')
#plt.title('Receiver Operating Characteristic Test Set')
#plt.legend(loc="lower right")
#plt.savefig('ROC AB5 test.png', dpi = 1000)

In [22]:
#plt.plot(fpr1, tpr1, color='red', label='Model 4 - AB 5 (AUC = %0.2f)' % AUC_train)
#plt.plot(fpr2, tpr2, color='black', label='Model 4 - AB 5 (AUC = %0.2f)' % AUC_test)
#plt.xlabel('False Positive Rate')
#plt.ylabel('True Positive Rate')
#plt.title('Receiver Operating Characteristic Model 4 (5 AB)')
#plt.legend(loc="lower right")
#plt.savefig('ROC AB5.png', dpi = 1000)