Libraries

In [1]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score,precision_score,f1_score,recall_score,roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier

Load data

In [2]:
s25 = np.load('s25.npy')
h25 = np.load('h25.npy')

In [3]:
s25_labels = np.array([0 for _ in range(0,len(s25))])
h25_labels = np.array([1 for _ in range(0,len(h25))])

In [4]:
X = np.append(s25,h25,axis=0)

In [5]:
y = np.append(s25_labels,h25_labels,axis=0)

In [6]:
def define_models(models=dict()):
    # nonlinear models
    models[ ' knn ' ] = KNeighborsClassifier(n_neighbors=7,n_jobs=-1)
    models[ ' cart ' ] = DecisionTreeClassifier()
    models[ ' svm ' ] = SVC()
    models[ ' bayes ' ] = GaussianNB()
    # ensemble models
    models[ ' bag ' ] = BaggingClassifier(n_estimators=100,n_jobs=-1)
    models[ ' rf ' ] = RandomForestClassifier(n_estimators=100,n_jobs=-1)
    models[ ' et ' ] = ExtraTreesClassifier(n_estimators=100,n_jobs=-1)
    print( ' Defined %d models ' % len(models))
    return models

In [7]:
def evaluate_model(trainX, trainy, testX, testy, model):
    # fit the model
    model.fit(trainX, trainy)
    # make predictions
    yhat = model.predict(testX)
    # evaluate predictions
    accuracy = accuracy_score(testy, yhat)* 100.0
    precision = precision_score(testy, yhat)* 100.0
    recall = recall_score(testy, yhat)* 100.0
    f1score = f1_score(testy, yhat)* 100.0
    auc = roc_auc_score(testy, yhat)* 100.0
    return [accuracy ,precision,recall,f1score, auc]

In [8]:
def evaluate_models(X,y, models):
    
    results = dict()
    for i in range(0,5):
        print('Iteration ' + str(i+1))
        trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.20)
        trainX = trainX.reshape((trainX.shape[0], trainX.shape[1] * trainX.shape[2]))
        testX = testX.reshape((testX.shape[0], testX.shape[1] * testX.shape[2]))
        for name, model in models.items():
            print(name)
            # evaluate the model
            try:
                results[name].append(evaluate_model(trainX, trainy, testX, testy, model))
            except:
                results[name] = [evaluate_model(trainX, trainy, testX, testy, model)]
    return results

In [9]:
def show_results(results):
    for key in results.keys():
        values = results[key]
        print(key+ ', mean: '+ str(np.mean(values,axis=0))+ ', std: ' + str(np.std(values,axis=0)))

In [10]:
# get model list
models = define_models()
# evaluate models
results = evaluate_models(X, y, models)

 Defined 7 models 
Iteration 1
 knn 
 cart 
 svm 
 bayes 
 bag 
 rf 
 et 
Iteration 2
 knn 
 cart 
 svm 


  _warn_prf(average, modifier, msg_start, len(result))


 bayes 
 bag 
 rf 
 et 
Iteration 3
 knn 
 cart 
 svm 
 bayes 
 bag 
 rf 
 et 
Iteration 4
 knn 
 cart 
 svm 


  _warn_prf(average, modifier, msg_start, len(result))


 bayes 
 bag 
 rf 
 et 
Iteration 5
 knn 
 cart 
 svm 


  _warn_prf(average, modifier, msg_start, len(result))


 bayes 
 bag 
 rf 
 et 


In [11]:
show_results(results)

 knn , mean: [57.03056769 52.12880064 99.80769231 68.44829803 59.5874109 ], std: [2.21978428 2.6618283  0.38461538 2.30708749 0.56443267]
 cart , mean: [64.19213974 62.49155113 59.5200608  60.75582515 63.94893769], std: [3.08780254 5.15388845 5.40391032 3.9767022  3.12447954]
 svm , mean: [54.14847162 20.77253554 32.57692308 25.31135531 54.10675611], std: [ 3.97355918 25.50254391 39.96181609 31.00649841  5.16769241]
 bayes , mean: [62.62008734 56.08451006 93.21012284 69.94770503 64.35370088], std: [2.52521088 2.76844839 4.60800443 2.45633318 2.30970309]
 bag , mean: [77.37991266 81.8036581  66.93312139 73.41940021 76.91359658], std: [3.23733677 2.56803125 6.13717025 3.45800439 2.90691349]
 rf , mean: [75.19650655 83.60499849 59.00738564 68.99506919 74.20695512], std: [2.19907045 4.22064931 3.62920608 1.65892952 1.43184519]
 et , mean: [76.24454148 80.64572258 64.96110461 71.90200926 75.57602759], std: [1.844429   3.37487153 2.10475505 1.68762181 1.52031375]
