In [None]:
import numpy as np
from StatisticalAnalysis import compare_methods

In [None]:
import warnings
warnings.filterwarnings('ignore')

First we load the data and process it to have the correct form.

In [None]:
ts_hc = np.load('ts_hc.npy')
ts_adhd = np.load('ts_adhd.npy')

In [None]:
ts_hc = ts_hc.reshape(52,142*118)

In [None]:
ts_adhd = ts_adhd.reshape(40,142*118)

In [None]:
corr_hc = np.load('corr_hc.npy')
corr_adhd = np.load('corr_adhd.npy')

In [None]:
corr_hc = corr_hc.reshape(52,118*118)
corr_adhd = corr_adhd.reshape(40,118*118)

In [None]:
hc = np.append(ts_hc,corr_hc,axis=1)
adhd = np.append(ts_adhd,corr_adhd,axis=1)

In [None]:
X = np.append(hc,adhd,axis=0)

In [None]:
X.shape

In [None]:
X[0].shape

In [None]:
Y = np.append(np.zeros(52),np.ones(50),axis=0)

In [None]:
Y.shape

Now, we define the algorithms that we are going to use. 

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from scipy.stats import randint as sp_randint

In [None]:
# Decision tree
clfTree = DecisionTreeClassifier(random_state=84)
param_distTree = {"min_samples_split": sp_randint(3, 30)}
# SVM
clfSVC = SVC(random_state=84)
param_distSVC = {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],'kernel': ['rbf'], 'class_weight':['balanced', None]}
# KNN
clfKNN = KNeighborsClassifier()
param_distKNN = {'n_neighbors':sp_randint(3, 30)}
# Logistic regression
clfLR = LogisticRegression(random_state=84)
param_distLR = {'C': [0.1,0.5,1, 10, 100, 1000]}
# Neural Network
clfMLP = MLPClassifier(random_state=84)
param_distMLP = {'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'sgd', 'adam'],
                 'alpha': sp_randint(0.0001, 1), 'learning_rate': ['constant', 'invscaling', 'adaptive'],
                 'hidden_layer_sizes': [(5,2), (3,3,3), (5,3,2), (5,4,3,2)],
                 'momentum': [0.9, 0.95, 0.99]}
# RandomForest
clfRF = RandomForestClassifier(random_state=84)
param_distRF =  {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)], 'max_features': ['auto', 'sqrt'],'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                 'min_samples_split': [2, 5, 10],'min_samples_leaf': [1, 2, 4],
                 'bootstrap': [True, False]}
# ExtraTreesClassifier
clfET = ExtraTreesClassifier(random_state=84)
param_distET =  {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)], 'max_features': ['auto', 'sqrt'],'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                 'min_samples_split': [2, 5, 10],'min_samples_leaf': [1, 2, 4],
                 'bootstrap': [True, False]}

In [None]:
listAlgorithms = [clfTree,clfSVC,clfKNN,clfLR,clfMLP,clfRF,clfET]

In [None]:
listParams = [param_distTree,param_distSVC,param_distKNN,param_distLR,param_distMLP,param_distRF, param_distET]

In [None]:
listNames = ["Arbol", "SVM", "KNN", "LR", "MLP","RF","ET"]

# Plain comparison

In [None]:
compare_methods(X,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X,Y,listAlgorithms,listParams,listNames,metric='accuracy')

# PCA with 2 components

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
pca.fit(X)

In [None]:
X_pca2 = pca.transform(X)

In [None]:
X_pca2.shape

In [None]:
compare_methods(X_pca2,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_pca2,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_pca2,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_pca2,Y,listAlgorithms,listParams,listNames,metric='accuracy')

# PCA with 10 components

In [None]:
pca = PCA(n_components=10)

In [None]:
pca.fit(X)

In [None]:
X_pca10 = pca.transform(X)

In [None]:
!mv temp.csv temppca2.csv

In [None]:
compare_methods(X_pca10,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_pca10,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_pca10,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_pca10,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
!mv temp.csv temppca10.csv

# PCA with 100 components

In [None]:
pca = PCA(n_components=100)

In [None]:
pca.fit(X)

In [None]:
X_pca100 = pca.transform(X)

In [None]:
compare_methods(X_pca100,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_pca100,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_pca100,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_pca100,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
!mv temp.csv temppca100.csv

# TSNE with 2 components

In [None]:
from sklearn.manifold import TSNE

In [None]:
Xtsne2 = TSNE(n_components=2).fit_transform(X)

In [None]:
compare_methods(Xtsne2,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(Xtsne2,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(Xtsne2,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(Xtsne2,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
!mv temp.csv temptsne2.csv

# TSNE with 3 components

In [None]:
Xtsne3 = TSNE(n_components=3).fit_transform(X)
compare_methods(Xtsne3,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(Xtsne3,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(Xtsne3,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(Xtsne3,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
!mv temp.csv temptsne3.csv

# UMAP with 2 components

In [None]:
from umap import UMAP

In [None]:
reducer = UMAP()

In [None]:
Xumap = reducer.fit_transform(X)

In [None]:
compare_methods(Xumap,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(Xumap,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(Xumap,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(Xumap,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
!mv temp.csv tempumap2.csv

# UMAP with 10 components

In [None]:
reducer = UMAP(n_components=10)

In [None]:
Xumap10 = reducer.fit_transform(X)

In [None]:
compare_methods(Xumap10,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(Xumap10,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(Xumap10,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(Xumap10,Y,listAlgorithms,listParams,listNames,metric='accuracy')

# Isomap

In [None]:
from sklearn import manifold
clf = manifold.Isomap(10, n_components=2)
X_iso2 = clf.fit_transform(X)
compare_methods(X_iso2,Y,listAlgorithms,listParams,listNames,metric='auroc')
!mv temp.csv tempiso2.csv

In [None]:
compare_methods(X_iso2,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_iso2,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_iso2,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
clf = manifold.Isomap(10, n_components=10)
X_iso10 = clf.fit_transform(X)
compare_methods(X_iso10,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_iso10,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_iso10,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_iso10,Y,listAlgorithms,listParams,listNames,metric='accuracy')

# LLE

In [None]:
clf = manifold.LocallyLinearEmbedding(10, n_components=2,method='standard')
X_lle2 = clf.fit_transform(X)
compare_methods(X_lle2,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_lle2,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_lle2,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_lle2,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
clf = manifold.LocallyLinearEmbedding(10, n_components=10,method='standard')
X_lle10 = clf.fit_transform(X)
compare_methods(X_lle10,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_lle10,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_lle10,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_lle10,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
clf = manifold.LocallyLinearEmbedding(10, n_components=100,method='standard')
X_lle100 = clf.fit_transform(X)
compare_methods(X_lle100,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_lle100,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_lle100,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_lle100,Y,listAlgorithms,listParams,listNames,metric='accuracy')

# MDS

In [None]:
clf = manifold.MDS(n_components=2, n_init=1, max_iter=100)
X_mds2 = clf.fit_transform(X)
compare_methods(X_mds2,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_mds2,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_mds2,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_mds2,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
clf = manifold.MDS(n_components=10, n_init=1, max_iter=100)
X_mds10 = clf.fit_transform(X)
compare_methods(X_mds10,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_mds10,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_mds10,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_mds10,Y,listAlgorithms,listParams,listNames,metric='accuracy')

In [None]:
clf = manifold.MDS(n_components=100, n_init=1, max_iter=100)
X_mds100 = clf.fit_transform(X)
compare_methods(X_mds100,Y,listAlgorithms,listParams,listNames,metric='auroc')

In [None]:
compare_methods(X_mds100,Y,listAlgorithms,listParams,listNames,metric='precision')

In [None]:
compare_methods(X_mds100,Y,listAlgorithms,listParams,listNames,metric='recall')

In [None]:
compare_methods(X_mds100,Y,listAlgorithms,listParams,listNames,metric='accuracy')