In [35]:
import pandas as pd 
import numpy as np

## Loading the dataset

In [36]:
iris = pd.read_csv("iris.arff")
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [37]:
## Did this as class had a space after it for some reason

iris = iris.rename(columns={'class ':'class'}) 

In [38]:
iris['class'] = iris['class'].astype('category').cat.codes

In [39]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [40]:
X = iris.iloc[:,:4]
X.head(2)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2


In [41]:
y = iris['class']
y.head(2)

0    0
1    0
Name: class, dtype: int8

## Loading the base classifiers

In [42]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import ExtraTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_predict # This is for cross-validation
from sklearn.model_selection import cross_val_score # This is for cross-validation

In [43]:
!pip install mlxtend



In [44]:
from mlxtend.classifier import StackingClassifier

## 1st stacked model

In [45]:
clf1 = KNeighborsClassifier(n_neighbors=3)
clf2 = BaggingClassifier(n_estimators=10, random_state=42)
clf3 = ExtraTreeClassifier(max_depth=2, random_state=42)

In [46]:
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], 
                          meta_classifier=LogisticRegression(solver='liblinear',multi_class='ovr'),
                          use_probas=True)


## Results for 1st stacked model

In [47]:
print('10-fold cross validation: for 1st stacked model')


for clf, label in zip([sclf], 
                      ['StackingClassifier']):

    accuracy = cross_val_score(sclf, X, y, cv=10, scoring='accuracy')
    balanced_accuracy = cross_val_score(sclf, X, y, scoring='balanced_accuracy', cv = 10)
    sensitivity = cross_val_score(sclf, X, y, scoring='recall_macro', cv = 10)
    f1_score = cross_val_score(sclf, X, y, scoring='f1_macro', cv = 10)




    
    print("Accuracy: %0.2f " % (accuracy.mean()))
    print("Balanced_accuracy: %0.2f " % (balanced_accuracy.mean()))
    print("Sensitivity: %0.2f " % (sensitivity.mean()))
    print("f1_score: %0.2f " % (f1_score.mean()))
    
    ypred = cross_val_predict(sclf,X, y, cv=10)
    mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
    print ("Mathews correlation coefficient",mcc)


10-fold cross validation: for 1st stacked model
Accuracy: 0.97 
Balanced_accuracy: 0.97 
Sensitivity: 0.97 
f1_score: 0.97 
Mathews correlation coefficient 0.9500633396673706


In [48]:
from sklearn.metrics import confusion_matrix

ypred = cross_val_predict(sclf,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred)
print("Confusion Matrix ",conf_mat)





Confusion Matrix  [[50  0  0]
 [ 0 47  3]
 [ 0  2 48]]


In [49]:
def specificity(y,ypred):
        cm=confusion_matrix(y, ypred)
        FP = cm.sum(axis=0) - np.diag(cm)  
        FN = cm.sum(axis=1) - np.diag(cm)
        TP = np.diag(cm)
        TN = cm.sum() - (FP + FN + TP)
        Specificity = TN/(TN+FP)    
        return np.mean(Specificity)

In [50]:

print("Specificity",specificity(y,ypred))

Specificity 0.9833333333333334


## 2nd stacked model

In [51]:
clf1 = KNeighborsClassifier(n_neighbors=2)
clf2 = DecisionTreeClassifier(max_depth=3,random_state=42) ## changed this classifier
clf3 = ExtraTreeClassifier(max_depth=2, random_state=42)

sclf_2 = StackingClassifier(classifiers=[clf1, clf2, clf3], 
                          meta_classifier=LogisticRegression(solver='liblinear',multi_class='ovr'),
                          use_probas=True)



## Results for 2nd stacked model

In [52]:
print('10-fold cross validation: for 2nd stacked model')


for clf, label in zip([sclf], 
                      ['StackingClassifier']):

    accuracy_2 = cross_val_score(sclf_2, X, y, cv=10, scoring='accuracy')
    balanced_accuracy_2 = cross_val_score(sclf_2, X, y, scoring='balanced_accuracy', cv = 10)
    sensitivity_2 = cross_val_score(sclf_2, X, y, scoring='recall_macro', cv = 10)
    f1_score_2 = cross_val_score(sclf_2, X, y, scoring='f1_macro', cv = 10)




    
    print("Accuracy: %0.2f " % (accuracy_2.mean()))
    print("Balanced_accuracy: %0.2f " % (balanced_accuracy_2.mean()))
    print("Sensitivity: %0.2f " % (sensitivity_2.mean()))
    print("f1_score: %0.2f " % (f1_score_2.mean()))
    
    
    ypred = cross_val_predict(sclf_2,X, y, cv=10)
    mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
    print ("Mathews correlation coefficient",mcc)



10-fold cross validation: for 2nd stacked model
Accuracy: 0.96 
Balanced_accuracy: 0.96 
Sensitivity: 0.96 
f1_score: 0.96 
Mathews correlation coefficient 0.9402507669779171


In [53]:
from sklearn.metrics import confusion_matrix

ypred_2 = cross_val_predict(sclf_2,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred_2)
print("Confusion Matrix ",conf_mat)






Confusion Matrix  [[50  0  0]
 [ 0 46  4]
 [ 0  2 48]]


In [54]:

print("Specificity",specificity(y,ypred_2))

Specificity 0.98
