In [None]:
import numpy as np
import h5py
import scipy.io


In [None]:
# Loads KRAKEN data
def load_dataset(idx):
    filepath = "files_116/files_VLA_rand_"+ str(idx) +"_116.mat"
    
    mat = scipy.io.loadmat(filepath)
    signal_train=np.array([])
  
    # Training data
    args = (mat['p_cl_n'], mat['p_si_n'], mat['p_sa_n'],mat['p_gr_n'])
    for v in args: 
        tmp=np.vstack([np.real(v), np.imag(v)]) 
        signal_train=np.hstack([signal_train, tmp]) if signal_train.size else tmp
    
    
    labels_train=np.array([int(np.floor(i/1000)) for i in range(4000)])
    
    # Test data
    signal_test=np.array([])
    labels_test=[]
    
    for j in range(10):
        i=j+1
        args=(mat["p_cl_n"+str(i)], mat['p_si_n'+str(i)],
              mat["p_sa_n"+str(i)], mat["p_gr_n"+str(i)])
        for v in args: 
            tmp=np.vstack([np.real(v), np.imag(v)])
            signal_test=np.hstack([signal_test, tmp]) if signal_test.size else tmp

        labels_test=np.real(np.append([labels_test], [labels_train]))
    
    # Test labels are perturbed, associate them with the correct material type
    Y=labels_test
    labels=labels_test
    for i in range(len(labels)):
        if abs(labels[i]-1500)<20:
            Y[i]=0
        else:
            if abs(labels[i]-1575)<20:
                Y[i]=1
            else:
                if abs(labels[i]-1650)<20:
                    Y[i]=2
                else:
                    if abs(labels[i]-1800)<20:
                        Y[i]=3
    
    X_train = signal_train.transpose()
    y_train = np.array(labels_train, dtype=int);
    X_test = signal_test.transpose()
    y_test = np.array(Y, dtype=int);
    
    return X_train, y_train, X_test, y_test


In [32]:
from time import time
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Benchmark metrics for each classifier
# Adapted from Peter Prettenhofer, et. al,
# https://scikit-learn.org/0.19/auto_examples/text/document_classification_20newsgroups.html
def benchmark(clf):
    print('_' * 80)
    print("Training: ")
   
    t0 = time()
    clf.fit(X_train, y_train)
 
    results=clf.cv_results_
    candidates = np.flatnonzero(results['rank_test_score'] == 1)
    for candidate in candidates:
        print("Model with rank: {0}".format(1))
        print("Mean validation score: {0:.3f} (std: {1:.3f})"
              .format(results['mean_test_score'][candidate],
                      results['std_test_score'][candidate]))
        print("Parameters: {0}".format(results['params'][candidate]))
        print("\n")

    target_names=[]
    targets=np.unique(y_train);

    for i in range(len(targets)):
        target_names.append(np.str(targets[i]))
    
    train_time = time() - t0
    print("train time: %0.3fs" % train_time)

    t0 = time()
    pred = clf.predict(X_test)
    test_time = time() - t0
    
    print("test time:  %0.3fs" % test_time)
    
    score = metrics.accuracy_score(y_test, pred)
    print("accuracy:   %0.3f" % score)
    print("classification report:")
    print(metrics.classification_report(y_test, pred, target_names=target_names))
    
    print("confusion matrix:")
    print(metrics.confusion_matrix(y_test, pred))

    clf_descr = str(str(clf.estimator))
    clf_rep = str(metrics.classification_report(y_test, pred, target_names=target_names))
    clf_cm = str(metrics.confusion_matrix(y_test, pred))
    return clf_descr, score, train_time, test_time, clf_rep, clf_cm
      

In [33]:
from scipy.stats import randint as sp_randint
from scipy.stats import uniform, expon
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis


# list of (estimator, param_dist), where param_dist is used  in RandomizedSearchCV
classifiers = [
     (GaussianProcessClassifier(warm_start=True, n_jobs=-1, random_state=42),{
        'max_iter_predict': [10, 20, 50, 100, 200],
        'multi_class': ["one_vs_rest", "one_vs_one"],
        'n_restarts_optimizer':[0, 1]
     }),
     (KNeighborsClassifier(), {
         'n_neighbors': sp_randint(4, 10),
         'weights': ['uniform', 'distance'],
         'algorithm':['ball_tree', 'kd_tree'],
         'p': [1,2]
     }),
    (NearestCentroid(), {
         'metric': ['euclidean', 'manhattan','minkowski','chebyshev'],
         'shrink_threshold': [None, .001, .0001, .1, .01]
     }),
    (SVC(kernel='linear'), {
        'C': np.logspace(-1, 3, 100),
        'tol': np.logspace(-8, -2, 100)
    }),
    (SVC(random_state=42, gamma='scale'), {
        'C': [.1, 1, 10, 100, 1000],
        'kernel': ['rbf', 'poly', 'linear', 'sigmoid'], 
        'class_weight':['balanced', None],
        'tol': np.logspace(-8, -2, 100)
    }),
    (MLPClassifier(max_iter=100000), {
        'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
        'alpha': expon(scale=.1),
        'learning_rate': ['constant','adaptive'],
    }),
    (DecisionTreeClassifier(),{
        'criterion': ['gini', 'entropy'],
        'splitter': ['best','random'],
        'max_features':[None, 'auto', 'sqrt', 'log2'],
    }),
    (RandomForestClassifier(max_depth=5, n_jobs=-1), {
        "max_depth": sp_randint(2, 20),
        "n_estimators": sp_randint(2, 50),
        "max_features": ['auto', 'log2'],
        'class_weight': ['balanced', 'balanced_subsample'],
        'criterion': ['gini', 'entropy']
    }),
     (GaussianNB(), {
         'var_smoothing': np.logspace(-12, -8, 100)
     }), 
     (LinearDiscriminantAnalysis(n_components=2), {
         'solver': ['svd', 'lsqr'],
         'tol': np.logspace(-10, -2, 100)
     }),
     (LogisticRegression(solver='newton-cg', random_state=0, max_iter=100000), {
        'C': [.1, 1, 10, 100, 1000],
         'multi_class': ["auto", "ovr","multinomial"],
         'solver': ['sag', 'saga', 'newton-cg'],
         'tol': np.logspace(-6, -4, 100)
     }),
]
names = [e.__class__.__name__ for e, g in classifiers]

In [34]:
from sklearn.model_selection import RandomizedSearchCV
clf = RandomizedSearchCV(estimator=LinearDiscriminantAnalysis(n_components=2), param_distributions={'solver': ['svd', 'lsqr']}, n_iter=20, cv=5,
                                 verbose=0, n_jobs=-1)
X_train, y_train, X_test, y_test= load_dataset(15)
benchmark(clf)

________________________________________________________________________________
Training: 




Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'solver': 'svd'}


Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'solver': 'lsqr'}


train time: 0.286s
test time:  0.003s
accuracy:   0.809
classification report:
              precision    recall  f1-score   support

           0       0.99      0.25      0.40     10000
           1       0.57      0.99      0.72     10000
           2       0.99      0.99      0.99     10000
           3       1.00      1.00      1.00     10000

    accuracy                           0.81     40000
   macro avg       0.89      0.81      0.78     40000
weighted avg       0.89      0.81      0.78     40000

confusion matrix:
[[ 2517  7457    25     1]
 [   13  9918    69     0]
 [    0    63  9937     0]
 [    0     0     0 10000]]


("LinearDiscriminantAnalysis(n_components=2, priors=None, shrinkage=None,\n                           solver='svd', store_covariance=False, tol=0.0001)",
 0.8093,
 0.28588080406188965,
 0.003426074981689453,
 '              precision    recall  f1-score   support\n\n           0       0.99      0.25      0.40     10000\n           1       0.57      0.99      0.72     10000\n           2       0.99      0.99      0.99     10000\n           3       1.00      1.00      1.00     10000\n\n    accuracy                           0.81     40000\n   macro avg       0.89      0.81      0.78     40000\nweighted avg       0.89      0.81      0.78     40000\n',
 '[[ 2517  7457    25     1]\n [   13  9918    69     0]\n [    0    63  9937     0]\n [    0     0     0 10000]]')

In [35]:
from sklearn.preprocessing import StandardScaler
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split


data_name=['KRAKEN 2 layer']
dset_name=data_name

noiselevel = np.array([15, 17 ,19, 21, 26, 30, 32 ,33, 60])
grid_searches = [dict() for x in range(len(noiselevel))]
results = [[] for x in range(len(noiselevel))]
ds_cnt=-1
for nl in noiselevel:
    ds_cnt=ds_cnt+1
    X_train, y_train, X_test, y_test= load_dataset(nl)
    
    print('\n Noise level:', str(nl),'\n')
   
    # iterate over classifiers
    for est_idx, (name, (estimator, param_grid)) in \
        enumerate(zip(names, classifiers)):

        # Perform randomized grid search over possible hyperparameters
        clf = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, n_iter=20, cv=5,
                                 verbose=0, n_jobs=-1)
        
        with ignore_warnings(category=ConvergenceWarning):
            results[ds_cnt].append(benchmark(clf))
        grid_searches[ds_cnt][name] = clf

        # Make predictions for the four test cases
        print(name)
        
        #print('Test Score', str(clf.score(X_test, y_test)))
      
        
    


 Noise level: 15 

________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 50}


Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'n_restarts_optimizer': 1, 'multi_

test time:  4.830s
accuracy:   0.817
classification report:
              precision    recall  f1-score   support

           0       1.00      0.28      0.44     10000
           1       0.58      0.99      0.73     10000
           2       0.99      0.99      0.99     10000
           3       1.00      1.00      1.00     10000

    accuracy                           0.82     40000
   macro avg       0.89      0.82      0.79     40000
weighted avg       0.89      0.82      0.79     40000

confusion matrix:
[[ 2817  7160    22     1]
 [   14  9945    41     0]
 [    0    90  9910     0]
 [    0     0     0 10000]]
SVC
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'tol': 1.8738174228603867e-05, 'kernel': 'rbf', 'class_weight': 'balanced', 'C': 0.1}


Model with rank: 1
Mean validation score: 0.997 (std: 0.001)
Parameters: {'tol': 6.579332246575682e-07, 'kernel': 'rbf',



Model with rank: 1
Mean validation score: 0.947 (std: 0.008)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.316s
test time:  0.006s
accuracy:   0.765
classification report:
              precision    recall  f1-score   support

           0       0.90      0.24      0.37     10000
           1       0.53      0.90      0.67     10000
           2       0.89      0.93      0.91     10000
           3       0.99      0.99      0.99     10000

    accuracy                           0.77     40000
   macro avg       0.83      0.77      0.74     40000
weighted avg       0.83      0.77      0.74     40000

confusion matrix:
[[2352 7158  435   55]
 [ 274 8999  725    2]
 [   1  655 9336    8]
 [   0   68   12 9920]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.991 (std: 0.004)
Parameters: {'class_weight': 'balanced', 'criterion': 'gi


 Noise level: 17 

________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.995 (std: 0.002)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.995 (std: 0.002)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.995 (std: 0.002)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.995 (std: 0.002)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.995 (std: 0.002)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 50}


Model with rank: 1
Mean validation score: 0.995 (std: 0.002)
Parameters: {'n_restarts_optimizer': 1, 'multi_



Model with rank: 1
Mean validation score: 0.934 (std: 0.005)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.337s
test time:  0.006s
accuracy:   0.761
classification report:
              precision    recall  f1-score   support

           0       0.85      0.28      0.42     10000
           1       0.56      0.86      0.68     10000
           2       0.80      0.91      0.85     10000
           3       0.99      0.99      0.99     10000

    accuracy                           0.76     40000
   macro avg       0.80      0.76      0.74     40000
weighted avg       0.80      0.76      0.74     40000

confusion matrix:
[[2832 5801 1321   46]
 [ 476 8648  831   45]
 [  12  860 9082   46]
 [  19   33   65 9883]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.982 (std: 0.003)
Parameters: {'class_weight': 'balanced_subsample', 'crite

Model with rank: 1
Mean validation score: 0.987 (std: 0.003)
Parameters: {'tol': 5.2140082879996844e-05, 'solver': 'newton-cg', 'multi_class': 'multinomial', 'C': 1000}


Model with rank: 1
Mean validation score: 0.987 (std: 0.003)
Parameters: {'tol': 1.6681005372000591e-06, 'solver': 'newton-cg', 'multi_class': 'multinomial', 'C': 1000}


train time: 10.183s
test time:  0.002s
accuracy:   0.861
classification report:
              precision    recall  f1-score   support

           0       0.97      0.50      0.66     10000
           1       0.67      0.95      0.78     10000
           2       0.94      1.00      0.97     10000
           3       1.00      1.00      1.00     10000

    accuracy                           0.86     40000
   macro avg       0.89      0.86      0.85     40000
weighted avg       0.89      0.86      0.85     40000

confusion matrix:
[[ 4970  4717   304     9]
 [  168  9508   324     0]
 [    0    44  9956     0]
 [    0     0     0 10000]]
LogisticRegressi



Model with rank: 1
Mean validation score: 0.897 (std: 0.010)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.343s
test time:  0.007s
accuracy:   0.764
classification report:
              precision    recall  f1-score   support

           0       0.84      0.38      0.53     10000
           1       0.57      0.80      0.67     10000
           2       0.79      0.89      0.84     10000
           3       0.98      0.97      0.98     10000

    accuracy                           0.76     40000
   macro avg       0.79      0.76      0.75     40000
weighted avg       0.79      0.76      0.75     40000

confusion matrix:
[[3845 4953 1113   89]
 [ 704 8048 1199   49]
 [  43  951 8940   66]
 [   9  231   45 9715]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.973 (std: 0.004)
Parameters: {'class_weight': 'balanced', 'criterion': 'gi


 Noise level: 21 

________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.981 (std: 0.003)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.981 (std: 0.003)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.981 (std: 0.003)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.981 (std: 0.003)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.981 (std: 0.003)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 50}


Model with rank: 1
Mean validation score: 0.981 (std: 0.003)
Parameters: {'n_restarts_optimizer': 1, 'multi_



Model with rank: 1
Mean validation score: 0.869 (std: 0.003)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.370s
test time:  0.007s
accuracy:   0.744
classification report:
              precision    recall  f1-score   support

           0       0.79      0.36      0.50     10000
           1       0.54      0.78      0.64     10000
           2       0.79      0.86      0.82     10000
           3       0.97      0.97      0.97     10000

    accuracy                           0.74     40000
   macro avg       0.77      0.74      0.73     40000
weighted avg       0.77      0.74      0.73     40000

confusion matrix:
[[3619 5186 1051  144]
 [ 908 7848 1197   47]
 [  61 1247 8626   66]
 [  16  225   92 9667]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.962 (std: 0.006)
Parameters: {'class_weight': 'balanced', 'criterion': 'en

Model with rank: 1
Mean validation score: 0.972 (std: 0.006)
Parameters: {'tol': 1.023531021899027e-05, 'solver': 'newton-cg', 'multi_class': 'multinomial', 'C': 1000}


Model with rank: 1
Mean validation score: 0.972 (std: 0.006)
Parameters: {'tol': 2.310129700083158e-06, 'solver': 'saga', 'multi_class': 'auto', 'C': 1000}


train time: 12.587s
test time:  0.002s
accuracy:   0.842
classification report:
              precision    recall  f1-score   support

           0       0.92      0.48      0.63     10000
           1       0.66      0.90      0.76     10000
           2       0.90      0.99      0.94     10000
           3       1.00      1.00      1.00     10000

    accuracy                           0.84     40000
   macro avg       0.87      0.84      0.83     40000
weighted avg       0.87      0.84      0.83     40000

confusion matrix:
[[ 4785  4622   561    32]
 [  413  9023   563     1]
 [    4   114  9882     0]
 [    0     0     0 10000]]
LogisticRegression

 Noise lev

Model with rank: 1
Mean validation score: 0.728 (std: 0.243)
Parameters: {'alpha': 0.029914761877627906, 'hidden_layer_sizes': (50, 100, 50), 'learning_rate': 'adaptive'}


train time: 214.928s
test time:  0.074s
accuracy:   0.712
classification report:
              precision    recall  f1-score   support

           0       0.64      0.44      0.52     10000
           1       0.50      0.66      0.57     10000
           2       0.79      0.83      0.81     10000
           3       0.98      0.91      0.95     10000

    accuracy                           0.71     40000
   macro avg       0.73      0.71      0.71     40000
weighted avg       0.73      0.71      0.71     40000

confusion matrix:
[[4438 5027  408  127]
 [1651 6581 1755   13]
 [  14 1643 8343    0]
 [ 873    2    0 9125]]
MLPClassifier
________________________________________________________________________________
Training: 




Model with rank: 1
Mean validation score: 0.800 (std: 0.017)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.393s
test time:  0.007s
accuracy:   0.692
classification report:
              precision    recall  f1-score   support

           0       0.67      0.35      0.46     10000
           1       0.50      0.66      0.57     10000
           2       0.71      0.81      0.76     10000
           3       0.92      0.95      0.94     10000

    accuracy                           0.69     40000
   macro avg       0.70      0.69      0.68     40000
weighted avg       0.70      0.69      0.68     40000

confusion matrix:
[[3484 4763 1389  364]
 [1487 6602 1745  166]
 [ 192 1435 8114  259]
 [  36  289  176 9499]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.922 (std: 0.007)
Parameters: {'class_weight': 'balanced_subsample', 'crite


 Noise level: 30 

________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.934 (std: 0.006)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.934 (std: 0.006)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.934 (std: 0.006)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.934 (std: 0.006)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.934 (std: 0.006)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 50}


Model with rank: 1
Mean validation score: 0.934 (std: 0.006)
Parameters: {'n_restarts_optimizer': 1, 'multi_



Model with rank: 1
Mean validation score: 0.753 (std: 0.009)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.416s
test time:  0.007s
accuracy:   0.653
classification report:
              precision    recall  f1-score   support

           0       0.63      0.36      0.45     10000
           1       0.47      0.60      0.53     10000
           2       0.65      0.75      0.70     10000
           3       0.90      0.91      0.91     10000

    accuracy                           0.65     40000
   macro avg       0.66      0.65      0.65     40000
weighted avg       0.66      0.65      0.65     40000

confusion matrix:
[[3558 4521 1536  385]
 [1650 6008 2091  251]
 [ 343 1869 7469  319]
 [ 113  468  323 9096]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.888 (std: 0.006)
Parameters: {'class_weight': 'balanced', 'criterion': 'gi


 Noise level: 32 

________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.929 (std: 0.008)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.929 (std: 0.008)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.929 (std: 0.008)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.929 (std: 0.008)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.929 (std: 0.008)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 50}


Model with rank: 1
Mean validation score: 0.929 (std: 0.008)
Parameters: {'n_restarts_optimizer': 1, 'multi_



Model with rank: 1
Mean validation score: 0.731 (std: 0.008)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.420s
test time:  0.007s
accuracy:   0.666
classification report:
              precision    recall  f1-score   support

           0       0.67      0.37      0.48     10000
           1       0.50      0.64      0.56     10000
           2       0.66      0.75      0.70     10000
           3       0.87      0.91      0.89     10000

    accuracy                           0.67     40000
   macro avg       0.68      0.67      0.66     40000
weighted avg       0.68      0.67      0.66     40000

confusion matrix:
[[3688 4122 1574  616]
 [1376 6376 1909  339]
 [ 332 1799 7521  348]
 [  84  450  407 9059]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.883 (std: 0.009)
Parameters: {'class_weight': 'balanced_subsample', 'crite


 Noise level: 33 

________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.912 (std: 0.010)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.912 (std: 0.010)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.912 (std: 0.010)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.912 (std: 0.010)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.912 (std: 0.010)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 50}


Model with rank: 1
Mean validation score: 0.912 (std: 0.010)
Parameters: {'n_restarts_optimizer': 1, 'multi_



Model with rank: 1
Mean validation score: 0.724 (std: 0.012)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.433s
test time:  0.007s
accuracy:   0.631
classification report:
              precision    recall  f1-score   support

           0       0.59      0.36      0.45     10000
           1       0.45      0.56      0.50     10000
           2       0.64      0.71      0.67     10000
           3       0.87      0.89      0.88     10000

    accuracy                           0.63     40000
   macro avg       0.64      0.63      0.63     40000
weighted avg       0.64      0.63      0.63     40000

confusion matrix:
[[3641 4233 1549  577]
 [1907 5642 2072  379]
 [ 477 2058 7114  351]
 [ 123  554  462 8861]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.859 (std: 0.005)
Parameters: {'class_weight': 'balanced_subsample', 'crite


 Noise level: 60 

________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.727 (std: 0.017)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.727 (std: 0.017)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 10}


Model with rank: 1
Mean validation score: 0.727 (std: 0.017)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.727 (std: 0.017)
Parameters: {'n_restarts_optimizer': 1, 'multi_class': 'one_vs_one', 'max_iter_predict': 20}


Model with rank: 1
Mean validation score: 0.727 (std: 0.017)
Parameters: {'n_restarts_optimizer': 0, 'multi_class': 'one_vs_one', 'max_iter_predict': 50}


Model with rank: 1
Mean validation score: 0.727 (std: 0.017)
Parameters: {'n_restarts_optimizer': 1, 'multi_



Model with rank: 1
Mean validation score: 0.488 (std: 0.021)
Parameters: {'splitter': 'best', 'max_features': None, 'criterion': 'entropy'}


train time: 0.503s
test time:  0.008s
accuracy:   0.448
classification report:
              precision    recall  f1-score   support

           0       0.39      0.32      0.35     10000
           1       0.33      0.34      0.33     10000
           2       0.43      0.46      0.45     10000
           3       0.63      0.67      0.65     10000

    accuracy                           0.45     40000
   macro avg       0.44      0.45      0.45     40000
weighted avg       0.44      0.45      0.45     40000

confusion matrix:
[[3222 3086 2254 1438]
 [2666 3391 2722 1221]
 [1579 2488 4641 1292]
 [ 894 1309 1124 6673]]
DecisionTreeClassifier
________________________________________________________________________________
Training: 
Model with rank: 1
Mean validation score: 0.642 (std: 0.007)
Parameters: {'class_weight': 'balanced', 'criterion': 'en

In [36]:
import datetime
# Displaying results to paste into latex
for nc in range(9):
    print("\\begin{filecontents}{"+names[nc]+"-kraken.dat}\n noise 	snr accuracy") 
    for nl in range(len(noiselevel)):
        clf_descr, score, train_time, test_time, clf_rep, clf_cm=results[nl][nc]
    
        print(str(noiselevel[nl])+" "+ str(18-nl)+" "+ str(score*100))
    print("\end{filecontents}")
    

\begin{filecontents}{GaussianProcessClassifier-kraken.dat}
 noise 	snr accuracy
15 18 81.065
17 17 81.1675
19 16 81.0975
21 15 80.79499999999999
26 14 79.55250000000001
30 13 78.14750000000001
32 12 78.05
33 11 76.92999999999999
60 10 63.465
\end{filecontents}
\begin{filecontents}{KNeighborsClassifier-kraken.dat}
 noise 	snr accuracy
15 18 81.62
17 17 82.6875
19 16 82.515
21 15 81.15
26 14 79.4625
30 13 76.47
32 12 78.4725
33 11 74.8375
60 10 55.87
\end{filecontents}
\begin{filecontents}{NearestCentroid-kraken.dat}
 noise 	snr accuracy
15 18 81.065
17 17 81.195
19 16 81.10000000000001
21 15 80.8175
26 14 79.55250000000001
30 13 78.14750000000001
32 12 78.0475
33 11 76.92750000000001
60 10 63.465
\end{filecontents}
\begin{filecontents}{SVC-kraken.dat}
 noise 	snr accuracy
15 18 81.67999999999999
17 17 81.41000000000001
19 16 80.9
21 15 80.63749999999999
26 14 79.7975
30 13 78.4225
32 12 78.2475
33 11 76.5925
60 10 63.455
\end{filecontents}
\begin{filecontents}{SVC-kraken.dat}
 noise 	sn

In [None]:
# Displaying results in a pandas dataframe
#Code adapted from https://www.kaggle.com/grfiv4/displaying-the-results-of-a-grid-search
import pandas as pd
def score_summary(grid_searches, sort_by='mean_test_score'):
        frames = []
        for name, grid_search in grid_searches.items():
            frame = pd.DataFrame(grid_search.cv_results_)
            frame = frame.filter(regex='^(?!.*param_).*$')
            frame['estimator'] = len(frame)*[name]
            frames.append(frame)
        df = pd.concat(frames)
        
        df = df.sort_values([sort_by], ascending=False)
        df = df.reset_index()
        df = df.drop(['rank_test_score', 'index'], 1)
        
        columns = df.columns.tolist()
        columns.remove('estimator')
        columns = ['estimator']+columns
        df = df[columns]
        return df

In [None]:
df=score_summary(grid_searches[0])
print(noiselevel[0])
df