In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from numpy import array 

bend_df = pd.read_csv("bend.csv")
X = bend_df.Value
le = LabelEncoder()
Y = le.fit_transform(bend_df.State)

In [2]:
from sklearn.model_selection import train_test_split
X_train,X_test, Y_train,Y_test = train_test_split(X, Y,train_size=0.7,random_state=1)

#model.fit requires [[], [], []] format.
X_train = array(X_train).reshape(-1, 1) 
X_test = array(X_test).reshape(-1, 1) 

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

models = []

models.append(('Gaussian N-Bayes', GaussianNB()))
models.append(('Support Vector Machine', SVC(gamma='auto')))
models.append(('Random Forest', RandomForestClassifier(n_estimators=15)))
models.append(('K Nearest Neighbors', KNeighborsClassifier(n_neighbors=20)))
models.append(('Decision Tree', DecisionTreeClassifier()))

In [4]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

import joblib
csv_file = open("Analysis.csv","w")
csv_file.write("Name" + "," + 
               "Accuracy"+ "," +
               "Misclassification Rate" + "," + 
               "f1_score" + "," + 
               "Precision" + "," + 
               "Recall" + "\n")

for name, model in models:
    print(name)
    
    model.fit(X_train,Y_train)
    print("Fitting Done")
    
    filename = "_".join(name.split(" "))+'.sav'
    joblib.dump(model, open(filename, 'wb'))
    
    Y_pred = model.predict(X_test)
    csv_file.write(name + "," +
                   str(accuracy_score(Y_test, Y_pred)) + "," +
                   str(1 - accuracy_score(Y_test, Y_pred)) + "," +
                   str(f1_score(Y_test, Y_pred, average="macro")) + "," +
                   str(precision_score(Y_test, Y_pred, average="macro")) + "," +
                   str(recall_score(Y_test, Y_pred, average="macro")) + "\n")
    
    print("Accuracy:",accuracy_score(Y_test, Y_pred))
    print("Misclassification Rate:",1 - accuracy_score(Y_test, Y_pred))
    print("f1_score:",f1_score(Y_test, Y_pred, average="macro"))
    print("Precision:",precision_score(Y_test, Y_pred, average="macro"))
    print("Recall:",recall_score(Y_test, Y_pred, average="macro"))
    CM = confusion_matrix(Y_test, Y_pred)
    print(CM)
    print("Sensitivity of F = ", end = '')
    print(CM[0,0]/sum((CM.transpose())[0]))
    print("Sensitivity of M = ", end = '')
    print(CM[1,1]/sum((CM.transpose())[1]))
    print("Sensitivity of N = ", end = '')
    print(CM[2,2]/sum((CM.transpose())[2]))
    print("\n")

Gaussian N-Bayes
f1_score: 0.7988267770876467
After 10 fold cross validation:
>>>Accuracy: 0.74 (+/- 0.20)
>>>Misclassification Rate: 0.26 (+/- 0.20)
>>>F1_score: 0.19 (+/- 0.13)
>>>Precision: 0.19 (+/- 0.12)
>>>Recall: 0.16 (+/- 0.13)
>>>Confusion Matrix:
[[26  6  0]
 [16 12  0]
 [ 0  0 30]]
>>>Sensitiviy: 
   Sensitivity of F = 0.6190476190476191
   Sensitivity of M = 0.6666666666666666
   Sensitivity of N = 1.0


Support Vector Machine
f1_score: 0.7346179851250844
After 10 fold cross validation:
>>>Accuracy: 0.79 (+/- 0.12)
>>>Misclassification Rate: 0.21 (+/- 0.12)
>>>F1_score: 0.21 (+/- 0.13)
>>>Precision: 0.18 (+/- 0.13)
>>>Recall: 0.22 (+/- 0.14)
>>>Confusion Matrix:
[[24  8  0]
 [16 17  0]
 [ 0  0 25]]
>>>Sensitiviy: 
   Sensitivity of F = 0.6
   Sensitivity of M = 0.68
   Sensitivity of N = 1.0


Random Forest
f1_score: 0.8629437443373692
After 10 fold cross validation:
>>>Accuracy: 0.77 (+/- 0.17)
>>>Misclassification Rate: 0.23 (+/- 0.17)
>>>F1_score: 0.21 (+/- 0.15)
>>>Prec