In [8]:
import scipy
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
import pandas as pd
import numpy as np
import os
import scipy.io

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, roc_curve,precision_recall_curve, auc,confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC

from catboost import CatBoostClassifier

from xgboost import XGBClassifier

import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Input, LSTM, RepeatVector
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.optimizers import SGD, RMSprop, Adam
from keras import objectives
from keras import layers

In [9]:
import config as cf

In [10]:
list_file_patient_label = cf.list_file_patient_label


dict_patient_files = {
    "list_file_patient_data_5Bands":cf.list_file_patient_data_5Bands,
    "list_file_patient_data_2hz":cf.list_file_patient_data_2hz,
    "list_file_patient_data_EOG_features":cf.list_file_patient_data_EOG_features,
    "list_file_patient_data_eeg_forehead_2hz":cf.list_file_patient_data_eeg_forehead_2hz,
    "list_file_patient_data_eeg_forehead_5bands":cf.list_file_patient_data_eeg_forehead_5bands}

In [11]:
def process_patient_feature(file,eog):
    data=scipy.io.loadmat(file)
    transformations_performed = [element for element in data.keys() if not "__" in element]
    all_feature_ = []
    if not eog:
        #print('here')
        for transformation in transformations_performed:
            feature_data = data[transformation]
            #print(feature_data.shape,"ff")
            channels,sample_number,frequency_bands = feature_data.shape
            #print(feature_data.shape)!
            feature_data=feature_data.reshape(sample_number,channels*frequency_bands)

            all_feature_.append(feature_data)
    else:
        for transformation in transformations_performed:
            #print("hi")
            feature_data = data[transformation]
            all_feature_.append(feature_data)
        
    #print(np.array(all_feature_).shape)
    patient_feature = np.concatenate(all_feature_,axis=-1)
    #print(patient_feature.shape)
    return patient_feature

In [12]:
def create_labels(labels):
    list_class=labels.copy()
    for i,e in enumerate(list_class):
        #print(i)
        if e<0.35:
            list_class[i]=0
        elif e >=0.35 and e<0.70:
            list_class[i]=1
        else:
            list_class[i]=2
    return list_class

In [13]:
def min_max_scale(s):
    return (s - s.min()) / (s.max() - s.min())

In [14]:
def pre_pro_features(lst_patient,eog):
    if eog:
        all_patient_features = np.array([process_patient_feature(file,eog=eog) for file in lst_patient])
    all_patient_features = np.array([process_patient_feature(file,eog=eog) for file in lst_patient])
    return all_patient_features

In [15]:
keys=dict_patient_files.keys()
features_eog = [pre_pro_features(dict_patient_files[key],eog=True) for key in keys if "EOG" in key]
features_eog = np.array(features_eog)[0]
features_eog = features_eog.reshape(features_eog.shape[0]*features_eog.shape[1],features_eog.shape[2])
features_eog.shape


(20355, 108)

In [16]:
features_eeg = [pre_pro_features(dict_patient_files[key],eog=False) for key in keys if "5bands" in key]
features_eeg = np.concatenate(features_eeg,axis=-1)
features_eeg = features_eeg.reshape(features_eeg.shape[0]*features_eeg.shape[1],features_eeg.shape[2])
print(features_eeg.shape)
complete_dataset = np.concatenate([features_eeg,features_eog],axis=-1)
print(complete_dataset.shape)

(20355, 80)
(20355, 188)


In [17]:
threshold = 0.7
all_patient_labels = np.array([scipy.io.loadmat(file)['perclos'] for file in list_file_patient_label])
number_patient,samples,_=all_patient_labels.shape
labels = all_patient_labels.reshape(number_patient*samples)
labels = (labels<threshold).astype(int)

#labels=create_labels(labels)

complete_dataset = min_max_scale(complete_dataset)

In [18]:
np.count_nonzero(labels)-len(labels)

-4049

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.decomposition import PCA
seed = 123

In [20]:
def feature_importances(complete_dataset, labels):
    forest = ExtraTreesClassifier(n_estimators=250,
                                random_state=0)
    forest.fit(complete_dataset, labels)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
                axis=0)
    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")

    for f in range(complete_dataset.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
    return indices,importances,std

In [21]:
#df_encoder_ff = pd.DataFrame(columns=["f1 mean","f1 std","precision mean","precision std","recall mean","recall std"])
def run_exp_for_feature_importance(name,complete_dataset,labels,z_mean=False,z_logsigma=False):

    encoder_feature_importances =[]

    for i in [8,16,32]:
        encoder = keras.models.load_model('{}_{}.h5'.format(name,i))
        if "lstm" in name:
            new_dataset = encoder.predict(complete_dataset[:,:,np.newaxis])
        else:
            new_dataset = encoder.predict(complete_dataset)

        if z_mean and "ffvae" in name:
            new_dataset = np.array(new_dataset)[0,:,:]
            indices,importances,std=feature_importances(new_dataset,labels)
            encoder_feature_importances.append(pd.DataFrame({"indices":indices,"features_importances":importances,"std":std,}))
        
        elif z_logsigma and 'ffvae' in name:
            new_dataset = np.array(new_dataset)[1,:,:]
            indices,importances,std=feature_importances(new_dataset,labels)
            encoder_feature_importances.append(pd.DataFrame({"indices":indices,"features_importances":importances,"std":std,}))

        else:
            indices,importances,std=feature_importances(new_dataset,labels)
            encoder_feature_importances.append(pd.DataFrame({"indices":indices,"features_importances":importances,"std":std,}))
    return encoder_feature_importances

In [22]:
 encoder_feature_importance_ff =run_exp_for_feature_importance('encoder',complete_dataset,labels)



No training configuration found in save file: the model was *not* compiled. Compile it manually.



InternalError: 2 root error(s) found.
  (0) Internal: Blas GEMM launch failed : a.shape=(32, 188), b.shape=(188, 64), m=32, n=64, k=188
	 [[{{node dense_12/MatMul}}]]
	 [[dense_13/Relu/_27]]
  (1) Internal: Blas GEMM launch failed : a.shape=(32, 188), b.shape=(188, 64), m=32, n=64, k=188
	 [[{{node dense_12/MatMul}}]]
0 successful operations.
0 derived errors ignored.

In [None]:
encoder_feature_importance_lstm = run_exp_for_feature_importance('encoder_lstm',complete_dataset,labels)

In [None]:
encoder_feature_importance_lstm_sigma=run_exp_for_feature_importance('encoder_lstm_z_logsigma',complete_dataset,labels)

In [None]:
encoder_feature_importance_ff_sigma= run_exp_for_feature_importance('encoder_ffvae',complete_dataset,labels,z_logsigma=True)

In [None]:
def plot_bar(encoder_features,encoder_features_vae,title,lstm=True):

    fig, axes = plt.subplots(3, 2, figsize=(15, 15), sharey=True)
    fig.suptitle(title)

    df8,df16,df32=encoder_features 
    df8_,df16_,df32_=encoder_features_vae

    # Bulbasaur
    sns.barplot(ax=axes[0,0], x=df8.indices, y=df8.features_importances,yerr=df8["std"][df8.indices])
    if lstm:
        axes[0,0].set_title("LSTM AE features")
    else:
        axes[0,0].set_title("FF AE features")



    # Charmander
    sns.barplot(ax=axes[0,1], x=df8_.indices, y=df8_.features_importances,yerr=df8_["std"][df8_.indices])
    if lstm:
        axes[0,1].set_title("LSTM VAE features")
    else:
        axes[0,1].set_title("FF VAE features")


    sns.barplot(ax=axes[1,0], x=df16.indices, y=df16.features_importances,yerr=df16["std"][df16.indices])

    # Charmander
    sns.barplot(ax=axes[1,1], x=df16_.indices, y=df16_.features_importances,yerr=df16_["std"][df16_.indices])


    sns.barplot(ax=axes[2,0], x=df32.indices, y=df32.features_importances,yerr=df32["std"][df32.indices])

    # Charmander
    sns.barplot(ax=axes[2,1], x=df32_.indices, y=df32_.features_importances,yerr=df32_["std"][df32_.indices])


#

In [None]:
plot_bar(encoder_feature_importance_ff,encoder_feature_importance_ff_sigma,title="VAE vs AE ff",lstm=False)

In [None]:
plot_bar(encoder_feature_importance_lstm,encoder_feature_importance_lstm_sigma,title="VAE vs AE LSTM")