In [2]:
import numpy as np
import h5py
import scipy.io


In [3]:
# Loads KRAKEN data and returns, training, hold out validation, and test
def load_dataset(idx, shuffle, Ntrain):
    filepath = "files_116/files_VLA_rand_"+ str(idx) +"_116.mat"
    
    mat = scipy.io.loadmat(filepath)
    signal_train=np.array([], dtype=np.float)
  
    # Training data
    args = (mat['p_cl_n'], mat['p_si_n'], mat['p_sa_n'],mat['p_gr_n'])
    for v in args: 
        tmp=np.vstack([np.real(v), np.imag(v)]) 
        signal_train=np.hstack([signal_train, tmp]) if signal_train.size else tmp

    mats=[1500,  1575, 1650, 1800]
    labels_train=np.array([mats[int(np.floor(i/1000))] for i in range(4000)], dtype=np.float)
    
    # Test data
    signal_test=np.array([],dtype=np.float)
    labels_test=np.array([], dtype=np.float)
    
    for j in range(10):
        i=j+1
        args=(mat["p_cl_n"+str(i)], mat['p_si_n'+str(i)], mat["p_sa_n"+str(i)], mat["p_gr_n"+str(i)])
        for v in args: 
            tmp=np.vstack([np.real(v), np.imag(v)])
            signal_test=np.hstack([signal_test, tmp]) if signal_test.size else tmp

        labels_test=np.real(np.append([labels_test], [labels_train]))

    # Test labels are perturbed, associate them with the correct material type
    Y=labels_test
    labels=labels_test
    
    X = signal_train.transpose()
    y = labels_train
    X_test = signal_test.transpose()
    y_test = Y
    
    X = signal_train.transpose()
    y = labels_train
    X_test = signal_test.transpose()
    y_test = labels_test
    
   
    ind=range(len(y))
    ind_train=ind[1:Ntrain]
    ind_val=ind[Ntrain:len(y)]
    ind_test = range(len(y_test))

  
    if shuffle:
        ind1 = np.random.permutation(len(y))
        ind_train=ind1[0:Ntrain]
        ind_val=ind1[Ntrain:len(y)]
        ind_test = np.random.permutation(len(y_test))
    
    X_train = np.array(X[ind_train,:], dtype = np.float)
    y_train = np.array(y[ind_train],)   
    X_val = np.array(X[ind_val,:], dtype = np.float)
    y_val = np.array(y[ind_val],) 
    X_test = np.array(X_test[ind_test,:], dtype = np.float)
    y_test = np.array(y_test[ind_test],)  
    return X_train, y_train, X_val, y_val, X_test, y_test



In [4]:
# Plot the training and test data
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import cm


X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(15,False, 3200)

f, ax = plt.subplots(2, 2, sharey=False, sharex=False)
ax[0][0].plot(X_train[:,4], color='orange')
ax[0][1].plot(y_train, color='yellow')
ax[1][0].plot(X_test[0:4000,20], color='green')
ax[1][1].plot(y_test, color='blue')

[<matplotlib.lines.Line2D at 0x130ed9d30>]

In [7]:
from time import time
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Benchmark metrics for each classifier
# Adapted from Peter Prettenhofer, et. al,
# https://scikit-learn.org/0.19/auto_examples/text/document_classification_20newsgroups.html
def benchmark(clf):
    target_names=[]
    targets=np.unique(y_train);

    for i in range(len(targets)):
        target_names.append(np.str(targets[i]))
    print('_' * 80)
    print("Training: ")
    t0 = time()
    clf.fit(X_train, y_train)
    train_time = time() - t0
    print("train time: %0.3fs" % train_time)
    
    results=clf.cv_results_
    candidates = np.flatnonzero(results['rank_test_score'] == 1)
    for candidate in candidates:
        print("Model with rank: {0}".format(1))
        print("Mean validation score: {0:.3f} (std: {1:.3f})"
              .format(results['mean_test_score'][candidate],
                      results['std_test_score'][candidate]))
        print("Parameters: {0}".format(results['params'][candidate]))
        print("\n")
    print("Validation: ")
    pred=clf.predict(X_val)
    test_time = time() - t0
    #print(metrics.confusion_matrix(y_val, pred))
    #print(metrics.classification_report(y_val, pred, target_names=target_names))
    score = metrics.accuracy_score(y_val, pred)
    print("val score:   %0.3f" % score)
    
    print("Testing: ")
    t0 = time()
    pred = clf.predict(X_test)
    print("test time:  %0.3fs" % test_time)
    
    score = metrics.accuracy_score(y_test, pred)
    print("accuracy:   %0.3f" % score)
    print("classification report:")
    #print(metrics.classification_report(y_test, pred, target_names=target_names))
    print("confusion matrix:")
    print(metrics.confusion_matrix(y_test, pred))

    clf_descr = str(str(clf.estimator))
    clf_rep = str(metrics.classification_report(y_test, pred, target_names=target_names))
    clf_cm = str(metrics.confusion_matrix(y_test, pred))
    return clf_descr, score, train_time, test_time, clf_rep, clf_cm
      

In [8]:
from scipy.stats import randint as sp_randint
from scipy.stats import uniform, expon
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis


# list of (estimator, param_dist), where param_dist is used  in RandomizedSearchCV
classifiers = [
#      (GaussianProcessClassifier(warm_start=True, n_jobs=-1, random_state=42),{
#         'multi_class': ["one_vs_rest", "one_vs_one"],
#         'n_restarts_optimizer':[0, 1]
#      }),
     (KNeighborsClassifier(), {
         'n_neighbors': sp_randint(8, 10),
         'weights': ['uniform', 'distance'],
         'algorithm':['ball_tree', 'kd_tree'],
     }),
    (NearestCentroid(), {
         'metric': ['euclidean', 'manhattan','minkowski','chebyshev'],
         'shrink_threshold': [None, .001, .0001, .1, .01]
     }),
    (SVC(kernel='linear'), {
        'C': np.logspace(-1, 3, 100),
        'tol': np.logspace(-8, -2, 100)
    }),
    (SVC(random_state=42, gamma='scale'), {
        'C': [.1, 1, 10, 100, 1000],
        'kernel': ['rbf', 'poly', 'linear', 'sigmoid'], 
        'class_weight':['balanced', None],
        'tol': np.logspace(-8, -2, 100)
    }),
    (MLPClassifier(max_iter=100000), {
        'hidden_layer_sizes': [(200), (50), (100,)],
        'alpha': expon(scale=.1),
        'learning_rate': ['constant','adaptive'],
    }),
    (DecisionTreeClassifier(),{
        'criterion': ['gini', 'entropy'],
        'splitter': ['best','random'],
        'max_features':[None, 'auto', 'sqrt', 'log2'],
    }),
    (RandomForestClassifier(max_depth=5, n_jobs=-1), {
        "max_depth": sp_randint(2, 20),
        "n_estimators": sp_randint(2, 50),
        "max_features": ['auto', 'log2'],
        'class_weight': ['balanced', 'balanced_subsample'],
        'criterion': ['gini', 'entropy']
    }),
     (GaussianNB(), {
         'var_smoothing': np.logspace(-12, -8, 100)
     }), 
     (LinearDiscriminantAnalysis(n_components=2), {
         'solver': ['svd', 'lsqr'],
         'tol': np.logspace(-10, -2, 100)
     }),
     (LogisticRegression(solver='newton-cg', random_state=0, max_iter=100000), {
        'C': [.1, 1, 10, 100, 1000],
         'multi_class': ["auto", "ovr","multinomial"],
         'solver': ['sag', 'saga', 'newton-cg'],
         'tol': np.logspace(-6, -4, 100)
     }),
]
names = [e.__class__.__name__ for e, g in classifiers]

In [9]:
# tests the classifiers without hyperparameter search
X_train, y_train,X_val, y_val, X_test, y_test= load_dataset(15, True, 3999)
X_val=X_train
y_val=y_train
target_names=[]
targets=np.unique(y_train);
for i in range(len(targets)):
        target_names.append(np.str(targets[i]))
        

j=1
(clf, param_grid)=classifiers[j]
name=names[j]
print(name)
clf.fit(X_train, y_train)
pred=clf.predict(X_val)
print(metrics.confusion_matrix(y_val, pred))
#print(metrics.classification_report(y_val, pred, target_names=target_names))
score = metrics.accuracy_score(y_val, pred)
print("val score:   %0.3f" % score)
pred=clf.predict(X_test)
print(metrics.confusion_matrix(y_test, pred))
#print(metrics.classification_report(y_test, pred, target_names=target_names))
score = metrics.accuracy_score(y_test, pred)
print("test score:   %0.3f" % score)

NearestCentroid
[[ 995    5    0    0]
 [   6  994    0    0]
 [   0    0 1000    0]
 [   0    0    0  999]]
val score:   0.997
[[ 2562  7406    31     1]
 [   11  9935    54     0]
 [    0    71  9929     0]
 [    0     0     0 10000]]
test score:   0.811


In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split


data_name=['KRAKEN 2 layer']
dset_name=data_name

noiselevel = np.array([15, 17 ,19, 21, 26, 30, 32 ,33, 60])
grid_searches = [dict() for x in range(len(noiselevel))]
results = [[] for x in range(len(noiselevel))]
ds_cnt=-1
for nl in noiselevel:
    ds_cnt=ds_cnt+1
    X_train, y_train,X_val, y_val, X_test, y_test= load_dataset(nl, True, 3200)
    
    print('\n Noise level:', str(nl),'\n')
   
    # iterate over classifiers
    for est_idx, (name, (estimator, param_grid)) in \
        enumerate(zip(names, classifiers)):

        # Perform randomized grid search over possible hyperparameters
        clf = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, n_iter=20, cv=5,
                                 verbose=0, n_jobs=-1)
        
        with ignore_warnings(category=ConvergenceWarning):
            results[ds_cnt].append(benchmark(clf))
        grid_searches[ds_cnt][name] = clf

        # Make predictions for the four test cases
        print(name)
        
        #print('Test Score', str(clf.score(X_test, y_test)))
      
        
    


 Noise level: 15 

________________________________________________________________________________
Training: 
train time: 1.905s
Model with rank: 1
Mean validation score: 0.996 (std: 0.002)
Parameters: {'algorithm': 'kd_tree', 'n_neighbors': 9, 'weights': 'uniform'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.002)
Parameters: {'algorithm': 'kd_tree', 'n_neighbors': 9, 'weights': 'uniform'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.002)
Parameters: {'algorithm': 'ball_tree', 'n_neighbors': 9, 'weights': 'distance'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.002)
Parameters: {'algorithm': 'ball_tree', 'n_neighbors': 9, 'weights': 'distance'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.002)
Parameters: {'algorithm': 'kd_tree', 'n_neighbors': 9, 'weights': 'uniform'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.002)
Parameters: {'algorithm': 'ball_tree', 'n_neighbors': 9, 'weights': 'distance'}


Model with rank: 1
M



train time: 217.971s
Model with rank: 1
Mean validation score: 0.995 (std: 0.003)
Parameters: {'alpha': 0.015046192029279143, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant'}


Validation: 
val score:   0.999
Testing: 
test time:  217.976s
accuracy:   0.798
classification report:
confusion matrix:
[[ 2331  7650    18     1]
 [   43  9616   341     0]
 [    0    13  9987     0]
 [    0     0     0 10000]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.301s
Model with rank: 1
Mean validation score: 0.951 (std: 0.008)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


Validation: 
val score:   0.961
Testing: 
test time:  0.302s
accuracy:   0.787
classification report:
confusion matrix:
[[3329 5533 1128   10]
 [ 346 8919  723   12]
 [  50  624 9286   40]
 [   3   51   12 9934]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 11.416s
Model with rank: 1
Mean validation score: 0.991 (std: 0.004)
Parameters: {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 19, 'max_features': 'log2', 'n_estimators': 32}


Model with rank: 1
Mean validation score: 0.991 (std: 0.003)
Parameters: {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 19, 'max_features': 'log2', 'n_estimators': 45}


Validation: 
val score:   0.995
Testing: 
test time:  11.522s
accuracy:   0.821
classification report:
confusion mat

test time:  0.841s
accuracy:   0.821
classification report:
confusion matrix:
[[ 3160  6769    70     1]
 [   63  9846    91     0]
 [    0   175  9825     0]
 [    0     0     0 10000]]
KNeighborsClassifier
________________________________________________________________________________
Training: 
train time: 0.199s
Model with rank: 1
Mean validation score: 0.996 (std: 0.003)
Parameters: {'shrink_threshold': None, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.003)
Parameters: {'shrink_threshold': 0.001, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.003)
Parameters: {'shrink_threshold': 0.0001, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.003)
Parameters: {'shrink_threshold': 0.1, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.996 (std: 0.003)
Parameters: {'shrink_threshold': 0.01, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.996 (std: 



train time: 423.173s
Model with rank: 1
Mean validation score: 0.995 (std: 0.003)
Parameters: {'alpha': 0.0015408974605549332, 'hidden_layer_sizes': 200, 'learning_rate': 'adaptive'}


Validation: 
val score:   0.994
Testing: 
test time:  423.182s
accuracy:   0.809
classification report:
confusion matrix:
[[ 2591  7311    87    11]
 [   15  9853   132     0]
 [    0    97  9903     0]
 [    0     0     0 10000]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.308s
Model with rank: 1
Mean validation score: 0.919 (std: 0.008)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'gini'}


Validation: 
val score:   0.927
Testing: 
test time:  0.309s
accuracy:   0.756
classification report:
confusion matrix:
[[2926 5817 1076  181]
 [ 519 8531  924   26]
 [  39  856 9066   39]
 [  14  140  114 9732]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 12.844s
Model with rank: 1
Mean validation score: 0.986 (std: 0.004)
Parameters: {'class_weight': 'balanced_subsample', 'criterion': 'entropy', 'max_depth': 18, 'max_features': 'auto', 'n_estimators': 38}


Validation: 
val score:   0.983
Testing: 
test time:  12.959s
accuracy:   0.814
classification report:
confusion matrix:
[[3202 6628  160   10]
 [ 123 9647  230    0]
 [   1  276 9723    0]
 [   0    2    0 9998]]
RandomForestClassifier
__________________________________________________________

train time: 0.207s
Model with rank: 1
Mean validation score: 0.990 (std: 0.004)
Parameters: {'shrink_threshold': 0.01, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.990 (std: 0.004)
Parameters: {'shrink_threshold': 0.01, 'metric': 'minkowski'}


Validation: 
val score:   0.993
Testing: 
test time:  0.209s
accuracy:   0.806
classification report:
confusion matrix:
[[ 2643  7239   117     1]
 [   67  9761   172     0]
 [    0   160  9840     0]
 [    0     0     0 10000]]
NearestCentroid
________________________________________________________________________________
Training: 
train time: 2.630s
Model with rank: 1
Mean validation score: 0.991 (std: 0.004)
Parameters: {'tol': 2.1544346900318867e-05, 'C': 1000.0}


Validation: 
val score:   0.993
Testing: 
test time:  2.690s
accuracy:   0.807
classification report:
confusion matrix:
[[ 2677  7207   115     1]
 [   68  9760   172     0]
 [    0   155  9845     0]
 [    0     0     0 10000]]
SVC
______________________



train time: 339.203s
Model with rank: 1
Mean validation score: 0.990 (std: 0.003)
Parameters: {'alpha': 0.0075399386579231426, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive'}


Validation: 
val score:   0.989
Testing: 
test time:  339.209s
accuracy:   0.817
classification report:
confusion matrix:
[[ 3138  6791    60    11]
 [  112  9754   134     0]
 [    0   195  9805     0]
 [    0     0     0 10000]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.317s
Model with rank: 1
Mean validation score: 0.905 (std: 0.008)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


Validation: 
val score:   0.897
Testing: 
test time:  0.318s
accuracy:   0.737
classification report:
confusion matrix:
[[2738 5796 1385   81]
 [ 613 8185 1155   47]
 [  32 1077 8826   65]
 [   6  157  104 9733]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 5.710s
Model with rank: 1
Mean validation score: 0.976 (std: 0.003)
Parameters: {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 13, 'max_features': 'log2', 'n_estimators': 30}


Validation: 
val score:   0.968
Testing: 
test time:  5.815s
accuracy:   0.801
classification report:
confusion matrix:
[[3058 6703  224   15]
 [ 246 9402  351    1]
 [   2  389 9608    1]
 [   0    9    2 9989]]
RandomForestClassifier
___________________________________________________________________

train time: 2.774s
Model with rank: 1
Mean validation score: 0.757 (std: 0.002)
Parameters: {'tol': 7.054802310718646e-08, 'C': 432.87612810830615}


Validation: 
val score:   0.802
Testing: 
test time:  2.844s
accuracy:   0.815
classification report:
confusion matrix:
[[9046  602  350    2]
 [5849 3690  461    0]
 [  49   76 9875    0]
 [   0    1    0 9999]]
SVC
________________________________________________________________________________
Training: 
train time: 1.841s
Model with rank: 1
Mean validation score: 0.983 (std: 0.003)
Parameters: {'tol': 1.519911082952933e-06, 'kernel': 'linear', 'class_weight': None, 'C': 1000}


Validation: 
val score:   0.985
Testing: 
test time:  1.905s
accuracy:   0.812
classification report:
confusion matrix:
[[3143 6652  203    2]
 [ 160 9577  263    0]
 [   1  226 9773    0]
 [   0    1    0 9999]]
SVC
________________________________________________________________________________
Training: 




train time: 238.427s
Model with rank: 1
Mean validation score: 0.982 (std: 0.003)
Parameters: {'alpha': 0.0033628744587802964, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive'}


Validation: 
val score:   0.988
Testing: 
test time:  238.432s
accuracy:   0.799
classification report:
confusion matrix:
[[ 2543  7233   208    16]
 [   91  9583   325     1]
 [    0   182  9818     0]
 [    0     0     0 10000]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.334s
Model with rank: 1
Mean validation score: 0.870 (std: 0.018)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


Validation: 
val score:   0.868
Testing: 
test time:  0.336s
accuracy:   0.736
classification report:
confusion matrix:
[[3359 5606  868  167]
 [ 924 7654 1365   57]
 [  86 1188 8636   90]
 [  27  163   36 9774]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 12.928s
Model with rank: 1
Mean validation score: 0.960 (std: 0.007)
Parameters: {'class_weight': 'balanced_subsample', 'criterion': 'entropy', 'max_depth': 12, 'max_features': 'auto', 'n_estimators': 46}


Validation: 
val score:   0.961
Testing: 
test time:  13.037s
accuracy:   0.795
classification report:
confusion matrix:
[[3211 6465  271   53]
 [ 403 9096  489   12]
 [   3  458 9527   12]
 [   1   22    2 9975]]
RandomForestClassifier
_______________________________________________________

train time: 0.207s
Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': None, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': 0.001, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': 0.0001, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': 0.01, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': None, 'metric': 'minkowski'}


Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': 0.001, 'metric': 'minkowski'}


Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': 0.0001, 'metric': 'minkowski'}


Model with rank: 1
Mean validation score: 0.960 (std: 0.009)
Parameters: {'shrink_threshold': 0.01, 



train time: 0.344s
Model with rank: 1
Mean validation score: 0.804 (std: 0.006)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


Validation: 
val score:   0.809
Testing: 
test time:  0.345s
accuracy:   0.691
classification report:
confusion matrix:
[[3612 4766 1237  385]
 [1452 6449 1886  213]
 [ 158 1497 8072  273]
 [  99  249  155 9497]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 12.303s
Model with rank: 1
Mean validation score: 0.923 (std: 0.014)
Parameters: {'class_weight': 'balanced_subsample', 'criterion': 'entropy', 'max_depth': 14, 'max_features': 'auto', 'n_estimators': 45}


Validation: 
val score:   0.926
Testing: 
test time:  12.418s
accuracy:   0.772
classification report:
confusion matrix:
[[3522 5857  480  141]
 [ 786 8287  869   58]
 [  56  768 9133   43]
 [   2   52   14 9932]]
RandomForestClassifier
_______________________________________________________

train time: 0.204s
Model with rank: 1
Mean validation score: 0.929 (std: 0.010)
Parameters: {'shrink_threshold': None, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.929 (std: 0.010)
Parameters: {'shrink_threshold': 0.001, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.929 (std: 0.010)
Parameters: {'shrink_threshold': 0.0001, 'metric': 'euclidean'}


Model with rank: 1
Mean validation score: 0.929 (std: 0.010)
Parameters: {'shrink_threshold': None, 'metric': 'minkowski'}


Model with rank: 1
Mean validation score: 0.929 (std: 0.010)
Parameters: {'shrink_threshold': 0.001, 'metric': 'minkowski'}


Model with rank: 1
Mean validation score: 0.929 (std: 0.010)
Parameters: {'shrink_threshold': 0.0001, 'metric': 'minkowski'}


Validation: 
val score:   0.944
Testing: 
test time:  0.207s
accuracy:   0.784
classification report:
confusion matrix:
[[3432 5884  628   56]
 [ 608 8668  719    5]
 [  36  686 9270    8]
 [   3   12    9 9976]]
NearestCentr



train time: 466.066s
Model with rank: 1
Mean validation score: 0.933 (std: 0.015)
Parameters: {'alpha': 0.005732964618329418, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive'}


Validation: 
val score:   0.941
Testing: 
test time:  466.072s
accuracy:   0.783
classification report:
confusion matrix:
[[3405 5974  497  124]
 [ 560 8668  760   12]
 [  25  699 9265   11]
 [   4    9   12 9975]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.347s
Model with rank: 1
Mean validation score: 0.740 (std: 0.014)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'gini'}


Validation: 
val score:   0.749
Testing: 
test time:  0.348s
accuracy:   0.643
classification report:
confusion matrix:
[[3483 4402 1583  532]
 [1775 5805 2122  298]
 [ 338 2009 7339  314]
 [ 165  402  359 9074]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 10.215s
Model with rank: 1
Mean validation score: 0.883 (std: 0.013)
Parameters: {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_features': 'log2', 'n_estimators': 46}


Validation: 
val score:   0.901
Testing: 
test time:  10.324s
accuracy:   0.751
classification report:
confusion matrix:
[[3681 5307  787  225]
 [1063 7537 1294  106]
 [  87  897 8930   86]
 [   6   63   36 9895]]
RandomForestClassifier
_____________________________________________________________________

train time: 2.620s
Model with rank: 1
Mean validation score: 0.928 (std: 0.008)
Parameters: {'tol': 0.0014174741629268048, 'C': 830.2175681319752}


Validation: 
val score:   0.915
Testing: 
test time:  2.682s
accuracy:   0.784
classification report:
confusion matrix:
[[4124 4835  979   62]
 [ 862 7928 1201    9]
 [  68  552 9369   11]
 [   7   18   22 9953]]
SVC
________________________________________________________________________________
Training: 
train time: 1.645s
Model with rank: 1
Mean validation score: 0.928 (std: 0.008)
Parameters: {'tol': 1.3219411484660288e-06, 'kernel': 'rbf', 'class_weight': 'balanced', 'C': 0.1}


Validation: 
val score:   0.917
Testing: 
test time:  1.712s
accuracy:   0.780
classification report:
confusion matrix:
[[3651 5524  755   70]
 [ 674 8390  925   11]
 [  53  720 9217   10]
 [   6   19   18 9957]]
SVC
________________________________________________________________________________
Training: 




train time: 304.380s
Model with rank: 1
Mean validation score: 0.928 (std: 0.009)
Parameters: {'alpha': 0.0054729423161818435, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant'}


Validation: 
val score:   0.919
Testing: 
test time:  304.385s
accuracy:   0.777
classification report:
confusion matrix:
[[3558 5508  780  154]
 [ 651 8301 1027   21]
 [  45  685 9252   18]
 [   6   14   27 9953]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.368s
Model with rank: 1
Mean validation score: 0.745 (std: 0.014)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


Validation: 
val score:   0.708
Testing: 
test time:  0.369s
accuracy:   0.652
classification report:
confusion matrix:
[[3611 4217 1535  637]
 [1691 5969 2024  316]
 [ 405 1792 7502  301]
 [ 126  484  398 8992]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 9.665s
Model with rank: 1
Mean validation score: 0.882 (std: 0.013)
Parameters: {'class_weight': 'balanced_subsample', 'criterion': 'entropy', 'max_depth': 14, 'max_features': 'log2', 'n_estimators': 48}


Validation: 
val score:   0.846
Testing: 
test time:  9.779s
accuracy:   0.762
classification report:
confusion matrix:
[[4235 4474 1026  265]
 [1132 7312 1426  130]
 [ 144  725 9050   81]
 [  20   55   61 9864]]
RandomForestClassifier
_________________________________________________________

train time: 2.685s
Model with rank: 1
Mean validation score: 0.909 (std: 0.009)
Parameters: {'tol': 1.1497569953977357e-06, 'C': 756.463327554629}


Validation: 
val score:   0.920
Testing: 
test time:  2.748s
accuracy:   0.771
classification report:
confusion matrix:
[[3596 5604  727   73]
 [ 844 8323  819   14]
 [  65  935 8980   20]
 [   8   42   20 9930]]
SVC
________________________________________________________________________________
Training: 
train time: 1.696s
Model with rank: 1
Mean validation score: 0.911 (std: 0.009)
Parameters: {'tol': 2.1544346900318867e-07, 'kernel': 'sigmoid', 'class_weight': 'balanced', 'C': 0.1}


Validation: 
val score:   0.915
Testing: 
test time:  1.759s
accuracy:   0.765
classification report:
confusion matrix:
[[3413 5658  835   94]
 [ 844 8088 1045   23]
 [  61  744 9170   25]
 [   7   32   21 9940]]
SVC
________________________________________________________________________________
Training: 




train time: 252.496s
Model with rank: 1
Mean validation score: 0.908 (std: 0.012)
Parameters: {'alpha': 0.005371506098479411, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive'}


Validation: 
val score:   0.917
Testing: 
test time:  252.501s
accuracy:   0.762
classification report:
confusion matrix:
[[3155 6082  630  133]
 [ 701 8325  951   23]
 [  36  882 9056   26]
 [   9   23   24 9944]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.372s
Model with rank: 1
Mean validation score: 0.700 (std: 0.016)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


Validation: 
val score:   0.703
Testing: 
test time:  0.373s
accuracy:   0.616
classification report:
confusion matrix:
[[3479 4044 1937  540]
 [2130 5276 2232  362]
 [ 468 2087 7052  393]
 [ 159  617  396 8828]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 6.988s
Model with rank: 1
Mean validation score: 0.858 (std: 0.018)
Parameters: {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 14, 'max_features': 'log2', 'n_estimators': 48}


Validation: 
val score:   0.869
Testing: 
test time:  7.093s
accuracy:   0.736
classification report:
confusion matrix:
[[3739 4951 1042  268]
 [1329 7100 1412  159]
 [ 151  971 8738  140]
 [  16   97   37 9850]]
RandomForestClassifier
______________________________________________________________________

train time: 2.698s
Model with rank: 1
Mean validation score: 0.733 (std: 0.010)
Parameters: {'tol': 2.477076355991714e-07, 'C': 911.1627561154896}


Validation: 
val score:   0.698
Testing: 
test time:  2.761s
accuracy:   0.633
classification report:
confusion matrix:
[[3749 3843 1678  730]
 [2143 5489 1918  450]
 [ 651 1758 7115  476]
 [ 255  435  348 8962]]
SVC
________________________________________________________________________________
Training: 
train time: 2.814s
Model with rank: 1
Mean validation score: 0.738 (std: 0.014)
Parameters: {'tol': 0.0012328467394420659, 'kernel': 'rbf', 'class_weight': 'balanced', 'C': 0.1}


Model with rank: 1
Mean validation score: 0.738 (std: 0.014)
Parameters: {'tol': 2.477076355991714e-07, 'kernel': 'rbf', 'class_weight': 'balanced', 'C': 0.1}


Validation: 
val score:   0.699
Testing: 
test time:  2.907s
accuracy:   0.633
classification report:
confusion matrix:
[[3685 3839 1700  776]
 [2109 5502 1918  471]
 [ 628 1770 7131  471]
 [ 238  413 



train time: 214.072s
Model with rank: 1
Mean validation score: 0.733 (std: 0.011)
Parameters: {'alpha': 0.004912250531199668, 'hidden_layer_sizes': 200, 'learning_rate': 'constant'}


Validation: 
val score:   0.694
Testing: 
test time:  214.074s
accuracy:   0.631
classification report:
confusion matrix:
[[4133 3365 1779  723]
 [2506 4919 2132  443]
 [ 746 1540 7284  430]
 [ 312  402  398 8888]]
MLPClassifier
________________________________________________________________________________
Training: 




train time: 0.366s
Model with rank: 1
Mean validation score: 0.487 (std: 0.016)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'gini'}


Validation: 
val score:   0.466
Testing: 
test time:  0.367s
accuracy:   0.441
classification report:
confusion matrix:
[[3117 3151 2312 1420]
 [2482 3551 2667 1300]
 [1464 2612 4495 1429]
 [ 881 1479 1167 6473]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
train time: 12.523s
Model with rank: 1
Mean validation score: 0.655 (std: 0.011)
Parameters: {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 14, 'max_features': 'auto', 'n_estimators': 41}


Validation: 
val score:   0.636
Testing: 
test time:  12.629s
accuracy:   0.566
classification report:
confusion matrix:
[[3751 3296 1819 1134]
 [2589 4033 2495  883]
 [ 906 1772 6423  899]
 [ 431  616  538 8415]]
RandomForestClassifier
_______________________________________________________________________

In [13]:
import datetime
# Displaying results to paste into latex
for nc in range(len(names)):
    print("\\begin{filecontents}{"+names[nc]+"-kraken.dat}\n noise 	snr accuracy") 
    for nl in range(len(noiselevel)):
        clf_descr, score, train_time, test_time, clf_rep, clf_cm=results[nl][nc]
    
        print(str(noiselevel[nl])+" "+ str(18-nl)+" "+ str(score*100))
    print("\end{filecontents}")
    

\begin{filecontents}{KNeighborsClassifier-kraken.dat}
 noise 	snr accuracy
15 18 81.75
17 17 82.0775
19 16 81.8925
21 15 81.5025
26 14 78.6125
30 13 75.84
32 12 78.0025
33 11 73.91
60 10 55.6775
\end{filecontents}
\begin{filecontents}{NearestCentroid-kraken.dat}
 noise 	snr accuracy
15 18 81.04249999999999
17 17 81.13749999999999
19 16 80.61
21 15 80.8275
26 14 79.72749999999999
30 13 78.365
32 12 78.11749999999999
33 11 76.99249999999999
60 10 63.417500000000004
\end{filecontents}
\begin{filecontents}{SVC-kraken.dat}
 noise 	snr accuracy
15 18 84.93
17 17 77.33
19 16 80.705
21 15 81.525
26 14 79.59
30 13 77.07000000000001
32 12 78.435
33 11 77.0725
60 10 63.287499999999994
\end{filecontents}
\begin{filecontents}{SVC-kraken.dat}
 noise 	snr accuracy
15 18 80.975
17 17 80.925
19 16 80.25750000000001
21 15 81.23
26 14 79.5775
30 13 78.2925
32 12 78.03750000000001
33 11 76.5275
60 10 63.2575
\end{filecontents}
\begin{filecontents}{MLPClassifier-kraken.dat}
 noise 	snr accuracy
15 18 79.83

In [None]:
# Displaying results in a pandas dataframe
#Code adapted from https://www.kaggle.com/grfiv4/displaying-the-results-of-a-grid-search
import pandas as pd
def score_summary(grid_searches, sort_by='mean_test_score'):
        frames = []
        for name, grid_search in grid_searches.items():
            frame = pd.DataFrame(grid_search.cv_results_)
            frame = frame.filter(regex='^(?!.*param_).*$')
            frame['estimator'] = len(frame)*[name]
            frames.append(frame)
        df = pd.concat(frames)
        
        df = df.sort_values([sort_by], ascending=False)
        df = df.reset_index()
        df = df.drop(['rank_test_score', 'index'], 1)
        
        columns = df.columns.tolist()
        columns.remove('estimator')
        columns = ['estimator']+columns
        df = df[columns]
        return df

In [None]:
df=score_summary(grid_searches[0])
print(noiselevel[0])
df