# Evaluation of Classifier Performance

In [2]:
# general liberaries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

# stats liberaries
from scipy import stats

# evaluation liberaries
from sklearn import model_selection
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

# models
from sklearn import linear_model
from sklearn import naive_bayes
from sklearn import neighbors
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

%matplotlib inline

# load the data
loans = pd.read_csv('./data/loans.csv')

# setup
target = 'safe_loans'
features = loans.columns[loans.columns!=target]

x = loans[features]
y = loans[target]

# apply classifier methods
simple_logistic = linear_model.LogisticRegression(C=10e10, random_state=0)
simple_eval = model_selection.cross_val_score(simple_logistic, x, y, cv=10)
print("Simple Logistic Regression\t%4.3f\t%4.3f" % (np.average(simple_eval), np.std(simple_eval)))

nb = naive_bayes.GaussianNB();
nb_eval = model_selection.cross_val_score(nb, x, y, cv=10)
print("Naive Bayes\t%4.3f\t%4.3f" % (np.average(nb_eval), np.std(nb_eval)))

knn = neighbors.KNeighborsClassifier(20, weights='distance')
knn_eval = model_selection.cross_val_score(knn, x, y, cv=10)
print("k Nearest Neighbor\t%4.3f\t%4.3f" % (np.average(knn_eval), np.std(knn_eval)))

rf = RandomForestClassifier(n_estimators=40, max_depth=None, min_samples_split=2, random_state=0)
rf_eval = model_selection.cross_val_score(rf, x, y)
print("Random Forest\t%4.3f\t%4.3f" % (np.average(rf_eval), np.std(rf_eval)))

Simple Logistic Regression	0.808	0.009
Naive Bayes	0.770	0.017
k Nearest Neighbor	0.801	0.004
Random Forest	0.807	0.007


## Unpaired t-test

In [3]:
def PrintSignificance(stat, c):
    if (stat[1]<(1-c)):
        print("The difference is statistically significant (cf %3.2f)"%c)
    else:
        print("The difference is not statistically significant (cf %3.2f)"%c)
        
        
unpaired_lr_rf = stats.ttest_ind(simple_eval, rf_eval)
print("Logistic Regression vs Random Forests: p-value = %4.3f"%unpaired_lr_rf[1])
PrintSignificance(unpaired_lr_rf, 0.95)
print("\n")

unpaired_lr_nb = stats.ttest_ind(simple_eval, nb_eval)
print("Logistic Regression vs Naive Bayes: p-value = %4.3f"%unpaired_lr_nb[1])
PrintSignificance(unpaired_lr_nb, 0.95)
print("\n")

Logistic Regression vs Random Forests: p-value = 0.855
The difference is not statistically significant (cf 0.95)


Logistic Regression vs Naive Bayes: p-value = 0.000
The difference is statistically significant (cf 0.95)




## Paired t-test

In [None]:
# Apply the methods to the same training and test sets

tf = model_selection.KFold(n_splits=10, shuffle=True, random_state=123456)

knn_scores = []
rf_scores = []

tf.split(loans)
for train, test in tf.split(loans):
    l = loans.loc[train]
    train_x = l[features]
    train_y = l[target]
    
    l_test = loans.loc[test]
    test_x = l_test[features]
    test_y = l_test[target]
    
    knn = neighbors.KNeighborsClassifier(20, weights='distance')
    knn = knn.fit(train_x, train_y)
    acc_knn = accuracy_score(test_y, knn.predict(test_x))

    rf = RandomForestClassifier(n_estimators=40, max_depth=None, min_samples_split=2, random_state=0)
    rf = rf.fit(train_x, train_y)
    acc_rf = accuracy_score(test_y, rf.predict(test_x))
    
    knn_scores = knn_scores + [acc_knn]
    rf_scores = rf_scores + [acc_rf]
    
print("k Nearest Neighbor\t%4.3f\t%4.3f" % (np.average(knn_scores), np.std(knn_scores)))
print("Random Forest     \t%4.3f\t%4.3f" % (np.average(rf_scores), np.std(rf_scores)))

In [None]:
paired_test = stats.ttest_rel(knn_scores, rf_scores)
print("k-nn vs Random Forests: p-value = %4.3f"%paired_test[1])
PrintSignificance(paired_test, 0.95)