In [1]:
import numpy as np
features=np.load("40features.npy")

In [2]:
labels=np.hstack((np.zeros((100,),dtype=int),np.ones((100,),dtype=int)))

In [3]:
from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [4]:
names = ["Logistic Regression", "Decision Tree", "Random Forest", 
            "SVM linear", "SVM polynomial", "SVM rbf", "Gaussian Naive Bayes"]
classifiers = [LogisticRegression(max_iter = 300), DecisionTreeClassifier(), RandomForestClassifier(n_estimators=100), 
               SVC(C=100, kernel = 'linear'), SVC(C=100, kernel = 'poly'), SVC(C=100, kernel = 'rbf'), GaussianNB()]

In [6]:
np.random.seed(1)
# number of test case
tc = 5
sss = StratifiedShuffleSplit(n_splits=tc, test_size=.3, random_state=1)
sss.get_n_splits(features, labels)
for train_index, test_index in sss.split(features, labels):
    print ('!--------------------!--------------------!--------------------!--------------------!--------------------!')
    print("Test using:", test_index)
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    
    for name, clf in zip(names, classifiers):
        print("----- Classifier: ", name)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
    
        print("Accuracy and confusion matrix: {:.2f}".format(metrics.accuracy_score(y_test, y_pred)))
        print(metrics.confusion_matrix(y_test,y_pred))
        
        # Cross-validation 5 folds
        accuracies5 = cross_val_score(estimator = clf, X = X_train, y = y_train, cv = 5)   
        print("5 fold cross-validation -> Accuracy: {:.2f}".format(accuracies5.mean()))
        # Cross-validation 10 folds
        accuracies10 = cross_val_score(estimator = clf, X = X_train, y = y_train, cv = 10)    
        print("10 fold cross-validation -> Accuracy: {:.2f}".format(accuracies10.mean()))
        

!--------------------!--------------------!--------------------!--------------------!--------------------!
Test using: [ 50 185  96  68 184 106 145 177  22 190 123 143 130 113   9 120  72 175
  85 124  20 194   7  16 136   6 170 102  75   1  29 107 112  25  28  71
 191  37   5  14 132  12  61 152  79 110  18 176 172 193  76 157 168  11
  13  63 121 181  64  57]
----- Classifier:  Logistic Regression
Accuracy and confusion matrix: 0.65
[[22  8]
 [13 17]]
5 fold cross-validation -> Accuracy: 0.61
10 fold cross-validation -> Accuracy: 0.63
----- Classifier:  Decision Tree
Accuracy and confusion matrix: 0.58
[[16 14]
 [11 19]]
5 fold cross-validation -> Accuracy: 0.56
10 fold cross-validation -> Accuracy: 0.55
----- Classifier:  Random Forest
Accuracy and confusion matrix: 0.83
[[25  5]
 [ 5 25]]
5 fold cross-validation -> Accuracy: 0.78
10 fold cross-validation -> Accuracy: 0.69
----- Classifier:  SVM linear
Accuracy and confusion matrix: 0.67
[[22  8]
 [12 18]]
5 fold cross-validation ->

In [7]:
np.random.seed(1)
# number of test case
tc = 5
sss = StratifiedShuffleSplit(n_splits=tc, test_size=.25, random_state=1)
sss.get_n_splits(features, labels)
for train_index, test_index in sss.split(features, labels):
    print ('!--------------------!--------------------!--------------------!--------------------!--------------------!')
    print("Test using:", test_index)
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    
    for name, clf in zip(names, classifiers):
        print("----- Classifier: ", name)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
    
        print("Accuracy and confusion matrix: {:.2f}".format(metrics.accuracy_score(y_test, y_pred)))
        print(metrics.confusion_matrix(y_test,y_pred))
        
        # Cross-validation 5 folds
        accuracies5 = cross_val_score(estimator = clf, X = X_train, y = y_train, cv = 5)   
        print("5 fold cross-validation -> Accuracy: {:.2f}".format(accuracies5.mean()))
        # Cross-validation 10 folds
        accuracies10 = cross_val_score(estimator = clf, X = X_train, y = y_train, cv = 10)    
        print("10 fold cross-validation -> Accuracy: {:.2f}".format(accuracies10.mean()))

!--------------------!--------------------!--------------------!--------------------!--------------------!
Test using: [123  50 172   1 106 110 193 120 152  29  96  11 130 136 185  28  13 107
   5 102 176 113  85 191  18 143  76  71  14 177   9 170  64  75 145  16
  25  79   6 168  37 175  20 190 121 157  68  72  12 194]
----- Classifier:  Logistic Regression
Accuracy and confusion matrix: 0.70
[[20  5]
 [10 15]]
5 fold cross-validation -> Accuracy: 0.61
10 fold cross-validation -> Accuracy: 0.60
----- Classifier:  Decision Tree
Accuracy and confusion matrix: 0.54
[[11 14]
 [ 9 16]]
5 fold cross-validation -> Accuracy: 0.56
10 fold cross-validation -> Accuracy: 0.55
----- Classifier:  Random Forest
Accuracy and confusion matrix: 0.86
[[23  2]
 [ 5 20]]
5 fold cross-validation -> Accuracy: 0.74
10 fold cross-validation -> Accuracy: 0.75
----- Classifier:  SVM linear
Accuracy and confusion matrix: 0.70
[[20  5]
 [10 15]]
5 fold cross-validation -> Accuracy: 0.65
10 fold cross-validation 

In [8]:
np.random.seed(1)
# number of test case
tc = 5
sss = StratifiedShuffleSplit(n_splits=tc, test_size=.2, random_state=1)
sss.get_n_splits(features, labels)
for train_index, test_index in sss.split(features, labels):
    print ('!--------------------!--------------------!--------------------!--------------------!--------------------!')
    print("Test using:", test_index)
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    
    for name, clf in zip(names, classifiers):
        print("----- Classifier: ", name)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
    
        print("Accuracy and confusion matrix: {:.2f}".format(metrics.accuracy_score(y_test, y_pred)))
        print(metrics.confusion_matrix(y_test,y_pred))
        
        # Cross-validation 5 folds
        accuracies5 = cross_val_score(estimator = clf, X = X_train, y = y_train, cv = 5)   
        print("5 fold cross-validation -> Accuracy: {:.2f}".format(accuracies5.mean()))
        # Cross-validation 10 folds
        accuracies10 = cross_val_score(estimator = clf, X = X_train, y = y_train, cv = 10)    
        print("10 fold cross-validation -> Accuracy: {:.2f}".format(accuracies10.mean()))

!--------------------!--------------------!--------------------!--------------------!--------------------!
Test using: [143   1 106  75 185 194 152 176 130  50  29   6 191 121 136  25  28 120
  37 172  76 193  18 175  71 102   5  79  20 177 190  11  72 145 107   9
 170  16  12  64]
----- Classifier:  Logistic Regression
Accuracy and confusion matrix: 0.65
[[14  6]
 [ 8 12]]
5 fold cross-validation -> Accuracy: 0.60
10 fold cross-validation -> Accuracy: 0.64
----- Classifier:  Decision Tree
Accuracy and confusion matrix: 0.65
[[13  7]
 [ 7 13]]
5 fold cross-validation -> Accuracy: 0.60
10 fold cross-validation -> Accuracy: 0.59
----- Classifier:  Random Forest
Accuracy and confusion matrix: 0.80
[[17  3]
 [ 5 15]]
5 fold cross-validation -> Accuracy: 0.76
10 fold cross-validation -> Accuracy: 0.78
----- Classifier:  SVM linear
Accuracy and confusion matrix: 0.62
[[14  6]
 [ 9 11]]
5 fold cross-validation -> Accuracy: 0.61
10 fold cross-validation -> Accuracy: 0.64
----- Classifier:  SVM