In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import cm

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification, load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import h5py
import scipy

In [3]:
def load_dataset(filename, summary=False):
    mat = scipy.io.loadmat(filename)
    
    test={}
    
    signal1=np.real(np.concatenate([mat['p_cl_n'], mat['p_si_n'],mat['p_sa_n'], mat['p_gr_n']],axis=1)).transpose()
    signal2=np.imag(np.concatenate([mat['p_cl_n'], mat['p_si_n'],mat['p_sa_n'], mat['p_gr_n']],axis=1)).transpose()
    signal=np.concatenate([signal1, signal2], axis=1)
    for i in range(1,11):
        aux1=np.real(np.concatenate([mat['p_cl_n'+str(i)], mat['p_si_n'+str(i)],mat['p_sa_n'+str(i)], mat['p_gr_n'+str(i)]],axis=1)).transpose()
        aux2=np.imag(np.concatenate([mat['p_cl_n'+str(i)], mat['p_si_n'+str(i)],mat['p_sa_n'+str(i)], mat['p_gr_n'+str(i)]],axis=1)).transpose()
        test[i]=np.concatenate([aux1, aux2], axis=1)
    labels=np.array([int(np.floor(i/1000)) for i in range(4000)])
    
    
    if summary:
        N=4000
        signal_mean=signal.mean(axis=1).reshape((N, 1))
        signal_std=signal.std(axis=1).reshape((N, 1))
        signal=np.concatenate((signal_mean, signal_std), axis=1)
        for i in test.keys():
            test_mean=test[i].mean(axis=1).reshape((N, 1))
            test_std=test[i].std(axis=1).reshape((N, 1))
            test[i]=np.concatenate((test_mean, test_std), axis=1)
            

    X = signal
    y = labels
    
    all_testing=np.concatenate([test[i] for i in range(1,11)])
    all_labels=np.concatenate([y for i in range(1,11)])
    return X, y, all_testing, all_labels


In [4]:
f1="./files_116/files_VLA_rand_15_116.mat"
f2="./files_116/files_VLA_rand_17_116.mat"
f3="./files_116/files_VLA_rand_19_116.mat"
f4="./files_116/files_VLA_rand_21_116.mat"
f5="./files_116/files_VLA_rand_26_116.mat"
f6="./files_116/files_VLA_rand_30_116.mat"
f7="./files_116/files_VLA_rand_32_116.mat"
f8="./files_116/files_VLA_rand_33_116.mat"
f9="./files_116/files_VLA_rand_60_116.mat"
filenames=[f1,f2,f3,f4,f5,f6,f7,f8,f9]

In [5]:
X_train={}
y_train={}
X_test={}
y_test={}
for i in range(len(filenames)):
    X_train[i],y_train[i],X_test[i], y_test[i] =load_dataset(filenames[i], False)
    

In [6]:

names = ["LDA","Nearest Neighbors", "Nearest Centroid", "Linear SVM", "RBF SVM",
         "Decision Tree", "Random Forest", "Gaussian Process", "Neural Net", ]#, "AdaBoost",
#         "Naive Bayes", "QDA"]

classifiers = [
    LinearDiscriminantAnalysis(),
    KNeighborsClassifier(5),
    NearestCentroid(metric='euclidean'),
    SVC(kernel="linear", gamma='auto'),
    SVC(gamma='scale'),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    GaussianProcessClassifier(), #1.0 * RBF(1.0)
    MLPClassifier()] #(alpha=1, max_iter=1000),
    #AdaBoostClassifier(),
    #GaussianNB(),
    #QuadraticDiscriminantAnalysis()]


In [None]:
from sklearn.metrics import confusion_matrix
score=[]
for name, clf in zip(names, classifiers):
    clf.fit(X_train[0], y_train[0])
    print(name)
    #aux=[clf.score(X_test[i], y_test[i]) for i in range(len(filenames))]
    #score = score+[aux]
    #print(aux)

    conf = confusion_matrix(y_test[1], clf.predict(X_test[1]))
    print(conf)
    

LDA
[[ 2656  7283    60     1]
 [   33  9851   116     0]
 [    0   102  9898     0]
 [    0     0     0 10000]]
Nearest Neighbors
[[ 3019  6877   103     1]
 [   60  9809   131     0]
 [    1   125  9874     0]
 [    0     0     0 10000]]
Nearest Centroid
[[ 2717  7210    72     1]
 [   31  9864   105     0]
 [    0   113  9887     0]
 [    0     0     0 10000]]
Linear SVM
[[ 2934  7008    57     1]
 [   38  9876    86     0]
 [    0   128  9872     0]
 [    0     0     0 10000]]
RBF SVM
[[ 3238  6681    80     1]
 [   38  9839   123     0]
 [    0   103  9897     0]
 [    0     0     0 10000]]
Decision Tree
[[3362 5910  697   31]
 [ 629 8470  884   17]
 [  47  997 8931   25]
 [  17  184    4 9795]]
Random Forest
[[2977 6890  124    9]
 [  97 9711  192    0]
 [   0  228 9772    0]
 [   0    3    0 9997]]
Gaussian Process
