In [1]:
import pandas as pd
import pickle
import numpy as np
from sklearn.metrics import roc_curve, auc
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef

In [2]:
train = pd.read_csv('train.csv')
X_train = train.iloc[:,:-1]
y_train = train.iloc[:,-1]
train.shape

(217, 2603)

In [3]:
X_train = X_train[['SM1_Dzs','AATS1dv','C2SP2.1','AATSC0v','ATSC2m']]
X_train.head()

Unnamed: 0,SM1_Dzs,AATS1dv,C2SP2.1,AATSC0v,ATSC2m
0,3.062025,11.174603,10,43.713379,337.017311
1,-1.292857,8.266667,12,46.971209,111.863599
2,1.769358,9.673333,15,47.30865,165.223676
3,1.768889,10.424242,11,45.145236,18.615523
4,-0.510204,6.911111,10,54.043565,517.659334


In [4]:
test = pd.read_csv('test.csv')
X_test = test.loc[:,['SM1_Dzs','AATS1dv','C2SP2.1','AATSC0v','ATSC2m']]
y_test = test.iloc[:,-1]
X_test.shape

The history saving thread hit an unexpected error (OperationalError('database is locked')).History will not be written to the database.


(55, 5)

In [5]:
# read best parameters 
param_best = open('parameter_AB5.pkl','rb')
best_parametersAB = pickle.load(param_best)
best_parametersAB

{'algorithm': 'SAMME.R', 'learning_rate': 0.01, 'n_estimators': 200}

In [6]:
svc=SVC(probability=True, kernel='linear')

In [7]:
#define model (parameter)
boost = AdaBoostClassifier(n_estimators = 200,
                           base_estimator=svc,
                           learning_rate = 0.01,
                           algorithm='SAMME.R',
                           random_state=3)

In [8]:
#fit model
boost.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
          learning_rate=0.01, n_estimators=200, random_state=3)

In [9]:
# predicting the train set results
y_pred_train = boost.predict(X_train)

In [10]:
# predicting the test set results
y_pred_test = boost.predict(X_test)

In [11]:
y_proba_train = boost.predict_proba(X_train)

In [12]:
y_proba_test = boost.predict_proba(X_test)

In [13]:
y_proba_train = y_proba_train[:,1]

In [14]:
y_proba_test = y_proba_test[:,1]

In [15]:
#y_proba_train

In [16]:
#y_proba_test

In [17]:
AUC_train = roc_auc_score(y_train, y_proba_train)
AUC_train

0.7016806722689075

In [18]:
AUC_test= roc_auc_score(y_test, y_proba_test)
AUC_test

0.7148541114058355

In [19]:
fpr1, tpr1, _ = roc_curve(y_train, y_proba_train)
fpr2, tpr2, _ = roc_curve(y_test, y_proba_test)

In [20]:
#Evaluating the Algorithm train set

print('confusion matrix:')
print(confusion_matrix(y_train,y_pred_train))
print('classification report:')
print(classification_report(y_train, y_pred_train))
print('accuracy score  :',accuracy_score(y_train, y_pred_train))
print('roc_auc score   :',roc_auc_score(y_train, y_proba_train))
print('MCC             :',matthews_corrcoef(y_train, y_pred_train))

confusion matrix:
[[ 36  62]
 [ 18 101]]
classification report:
              precision    recall  f1-score   support

           0       0.67      0.37      0.47        98
           1       0.62      0.85      0.72       119

   micro avg       0.63      0.63      0.63       217
   macro avg       0.64      0.61      0.59       217
weighted avg       0.64      0.63      0.61       217

accuracy score  : 0.631336405529954
roc_auc score   : 0.7016806722689075
MCC             : 0.24872723391760487


In [21]:
#Evaluating the Algorithm test set

print('confusion matrix:')
print(confusion_matrix(y_test,y_pred_test))
print('classification report:')
print(classification_report(y_test, y_pred_test))
print('accuracy score  :',accuracy_score(y_test, y_pred_test))
print('roc_auc score   :',roc_auc_score(y_test, y_proba_test))
print('MCC             :',matthews_corrcoef(y_test, y_pred_test))

confusion matrix:
[[11 18]
 [ 1 25]]
classification report:
              precision    recall  f1-score   support

           0       0.92      0.38      0.54        29
           1       0.58      0.96      0.72        26

   micro avg       0.65      0.65      0.65        55
   macro avg       0.75      0.67      0.63        55
weighted avg       0.76      0.65      0.63        55

accuracy score  : 0.6545454545454545
roc_auc score   : 0.7148541114058355
MCC             : 0.41202408119036454


In [22]:
print('roc_auc score   :',roc_auc_score(y_test, y_pred_test))

roc_auc score   : 0.6704244031830239


In [23]:
#plt.plot(fpr1, tpr1, color='red', label='Train')
#plt.plot(fpr2, tpr2, color='black', label='Test'
#plt.xlabel('False Positive Rate')
#plt.ylabel('True Positive Rate')
#plt.title('Receiver Operating Characteristic Model 4')
#plt.legend(loc="lower right")
#plt.savefig('ROC AB5.png', dpi = 1000)