In [6]:
import pandas as pd 
import numpy as np

## Loading the dataset

In [7]:
iris = pd.read_csv("iris.arff")
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Question 1 

    - Training using diff models and 10 fold cross validation

In [8]:
iris.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class '], dtype='object')

In [9]:
## Did this as class had a space after it for some reason

iris = iris.rename(columns={'class ':'class'}) 

In [10]:
iris['class'] = iris['class'].astype('category').cat.codes

In [11]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


#### Loading the classifiers

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import ExtraTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import sklearn

from sklearn.model_selection import cross_val_predict # This is for cross-validation
from sklearn.model_selection import cross_val_score # This is for cross-validation

#### Splitting into features and output label

In [13]:
X = iris.iloc[:,:4]
X.head(2)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2


In [14]:
y = iris['class']
y.head(2)

0    0
1    0
Name: class, dtype: int8

### KNN

In [35]:
model_knn = KNeighborsClassifier(n_neighbors=3)

accuracy = cross_val_score(model_knn, X, y, scoring='accuracy', cv = 10)

balanced_accuracy = cross_val_score(model_knn, X, y, scoring='balanced_accuracy', cv = 10)

sensitivity = cross_val_score(model_knn, X, y, scoring='recall_macro', cv = 10)

f1_score = cross_val_score(model_knn, X, y, scoring='f1_macro', cv = 10)



print("Accuracy of KNN with Cross Validation is:",accuracy.mean() * 100)
print("Balanced_accuracy of KNN with Cross Validation is:",balanced_accuracy.mean() * 100)
print("Sensitivity of KNN with Cross Validation is:",sensitivity.mean() * 100)
print("f1_score of KNN with Cross Validation is:",f1_score.mean() * 100)




ypred = cross_val_predict(model_knn,X, y, cv=10)
mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
print ("Matthews correlation coefficient",mcc)




Accuracy of KNN with Cross Validation is: 96.66666666666666
Balanced_accuracy of KNN with Cross Validation is: 96.66666666666666
Sensitivity of KNN with Cross Validation is: 96.66666666666666
f1_score of KNN with Cross Validation is: 96.59090909090908
Matthews correlation coefficient 0.9500633396673706


In [36]:
from sklearn.metrics import confusion_matrix

ypred = cross_val_predict(model_knn,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred)
print("Confusion Matrix ",conf_mat)




Confusion Matrix  [[50  0  0]
 [ 0 47  3]
 [ 0  2 48]]


In [37]:
def specificity(y,ypred):
        cm=confusion_matrix(y, ypred)
        FP = cm.sum(axis=0) - np.diag(cm)  
        FN = cm.sum(axis=1) - np.diag(cm)
        TP = np.diag(cm)
        TN = cm.sum() - (FP + FN + TP)
        Specificity = TN/(TN+FP)    
        return np.mean(Specificity)

In [38]:

print("Specificity",specificity(y,ypred))

Specificity 0.9833333333333334


## SVC

In [19]:
model = SVC(C=1,kernel='linear')

accuracy = cross_val_score(model, X, y, scoring='accuracy', cv = 10)

balanced_accuracy = cross_val_score(model, X, y, scoring='balanced_accuracy', cv = 10)

sensitivity = cross_val_score(model, X, y, scoring='recall_macro', cv = 10)

f1_score = cross_val_score(model, X, y, scoring='f1_macro', cv = 10)



print("Accuracy with Cross Validation is:",accuracy.mean() * 100)
print("Balanced_accuracy  with Cross Validation is:",balanced_accuracy.mean() * 100)
print("Sensitivity with Cross Validation is:",sensitivity.mean() * 100)
print("f1_score with Cross Validation is:",f1_score.mean() * 100)



ypred = cross_val_predict(model,X, y, cv=10)
mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
print ("Mathews correlation coefficient",mcc)




Accuracy with Cross Validation is: 97.33333333333334
Balanced_accuracy  with Cross Validation is: 97.33333333333333
Sensitivity with Cross Validation is: 97.33333333333333
f1_score with Cross Validation is: 97.26430976430976
Mathews correlation coefficient 0.9602561024455323


In [20]:
from sklearn.metrics import confusion_matrix

ypred = cross_val_predict(model,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred)
print("Confusion Matrix ",conf_mat)





Confusion Matrix  [[50  0  0]
 [ 0 47  3]
 [ 0  1 49]]


In [21]:

print("Specificity",specificity(y,ypred))

Specificity 0.9866666666666667


## ETC

In [22]:
model = ExtraTreeClassifier(max_depth=2,random_state=10)
accuracy = cross_val_score(model, X, y, scoring='accuracy', cv = 10)

balanced_accuracy = cross_val_score(model, X, y, scoring='balanced_accuracy', cv = 10)

sensitivity = cross_val_score(model, X, y, scoring='recall_macro', cv = 10)

f1_score = cross_val_score(model, X, y, scoring='f1_macro', cv = 10)



print("Accuracy with Cross Validation is:",accuracy.mean() * 100)
print("Balanced_accuracy  with Cross Validation is:",balanced_accuracy.mean() * 100)
print("Sensitivity with Cross Validation is:",sensitivity.mean() * 100)
print("f1_score with Cross Validation is:",f1_score.mean() * 100)



ypred = cross_val_predict(model,X, y, cv=10)
mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
print ("Mathews correlation coefficient",mcc)




Accuracy with Cross Validation is: 93.33333333333333
Balanced_accuracy  with Cross Validation is: 93.33333333333333
Sensitivity with Cross Validation is: 93.33333333333333
f1_score with Cross Validation is: 93.27272727272727
Mathews correlation coefficient 0.9002400960426865


In [23]:
from sklearn.metrics import confusion_matrix

ypred = cross_val_predict(model,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred)
print("Confusion Matrix ",conf_mat)




Confusion Matrix  [[48  2  0]
 [ 0 45  5]
 [ 0  3 47]]


In [24]:
print("Specificity",specificity(y,ypred))




Specificity 0.9666666666666667


## Bagging

In [25]:
model = BaggingClassifier(n_estimators=10,random_state=10)

accuracy = cross_val_score(model, X, y, scoring='accuracy', cv = 10)

balanced_accuracy = cross_val_score(model, X, y, scoring='balanced_accuracy', cv = 10)

sensitivity = cross_val_score(model, X, y, scoring='recall_macro', cv = 10)

f1_score = cross_val_score(model, X, y, scoring='f1_macro', cv = 10)



print("Accuracy with Cross Validation is:",accuracy.mean() * 100)
print("Balanced_accuracy  with Cross Validation is:",balanced_accuracy.mean() * 100)
print("Sensitivity with Cross Validation is:",sensitivity.mean() * 100)
print("f1_score with Cross Validation is:",f1_score.mean() * 100)



ypred = cross_val_predict(model,X, y, cv=10)
mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
print ("Mathews correlation coefficient",mcc)



Accuracy with Cross Validation is: 95.33333333333334
Balanced_accuracy  with Cross Validation is: 95.33333333333331
Sensitivity with Cross Validation is: 95.33333333333331
f1_score with Cross Validation is: 95.2861952861953
Mathews correlation coefficient 0.930062006200689


In [26]:
from sklearn.metrics import confusion_matrix

ypred = cross_val_predict(model,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred)
print("Confusion Matrix ",conf_mat)



Confusion Matrix  [[50  0  0]
 [ 0 47  3]
 [ 0  4 46]]


In [27]:

print("Specificity",specificity(y,ypred))

Specificity 0.9766666666666666


## DTC

In [28]:
model = DecisionTreeClassifier(max_depth = 2)


accuracy = cross_val_score(model, X, y, scoring='accuracy', cv = 10)

balanced_accuracy = cross_val_score(model, X, y, scoring='balanced_accuracy', cv = 10)

sensitivity = cross_val_score(model, X, y, scoring='recall_macro', cv = 10)

f1_score = cross_val_score(model, X, y, scoring='f1_macro', cv = 10)



print("Accuracy with Cross Validation is:",accuracy.mean() * 100)
print("Balanced_accuracy  with Cross Validation is:",balanced_accuracy.mean() * 100)
print("Sensitivity with Cross Validation is:",sensitivity.mean() * 100)
print("f1_score with Cross Validation is:",f1_score.mean() * 100)



ypred = cross_val_predict(model,X, y, cv=10)
mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
print ("Mathews correlation coefficient",mcc)



Accuracy with Cross Validation is: 95.33333333333334
Balanced_accuracy  with Cross Validation is: 94.66666666666667
Sensitivity with Cross Validation is: 95.33333333333334
f1_score with Cross Validation is: 95.25757575757575
Mathews correlation coefficient 0.9305585027027279


In [29]:
from sklearn.metrics import confusion_matrix

ypred = cross_val_predict(model,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred)
print("Confusion Matrix ",conf_mat)






Confusion Matrix  [[50  0  0]
 [ 0 48  2]
 [ 0  5 45]]


In [30]:
print("Specificity",specificity(y,ypred))



Specificity 0.9766666666666666


## LR

In [31]:
model = LogisticRegression(solver='liblinear',multi_class='ovr')

accuracy = cross_val_score(model, X, y, scoring='accuracy', cv = 10)

balanced_accuracy = cross_val_score(model, X, y, scoring='balanced_accuracy', cv = 10)

sensitivity = cross_val_score(model, X, y, scoring='recall_macro', cv = 10)

f1_score = cross_val_score(model, X, y, scoring='f1_macro', cv = 10)



print("Accuracy with Cross Validation is:",accuracy.mean() * 100)
print("Balanced_accuracy  with Cross Validation is:",balanced_accuracy.mean() * 100)
print("Sensitivity with Cross Validation is:",sensitivity.mean() * 100)
print("f1_score with Cross Validation is:",f1_score.mean() * 100)



ypred = cross_val_predict(model,X, y, cv=10)
mcc = sklearn.metrics.matthews_corrcoef(y, ypred)
print ("Mathews correlation coefficient",mcc)




Accuracy with Cross Validation is: 95.33333333333334
Balanced_accuracy  with Cross Validation is: 95.33333333333334
Sensitivity with Cross Validation is: 95.33333333333334
f1_score with Cross Validation is: 95.2861952861953
Mathews correlation coefficient 0.9305585027027279


In [32]:
from sklearn.metrics import confusion_matrix

ypred = cross_val_predict(model,X, y, cv=10)
conf_mat = confusion_matrix(y, ypred)
print("Confusion Matrix ",conf_mat)







Confusion Matrix  [[50  0  0]
 [ 0 45  5]
 [ 0  2 48]]


In [33]:
print("Specificity",specificity(y,ypred))




Specificity 0.9766666666666666
