In [1]:
# Import
import pandas as pd
import os
import numpy as np
from sklearn.metrics import pairwise_distances
import plotly.graph_objects as go
from scipy.io import loadmat

import dash
from dash import dcc, html, Input, Output
import plotly.express as px

from eeg_preictal_analysis import EEGPreictalAnalysis
eeg_class = EEGPreictalAnalysis()

feature_group = 'Multivariate' # 'Multivariate' OR 'Univariate_linear' OR 'Univariate_nonlinear'

In [2]:
## Prepare data to run feature reduction
patient_index = 112802
seizure_index = 6
pat_seiz_data_folder = 'patient' + str(patient_index) + '_seizure' + str(seizure_index)

# get current working directory
wd = os.getcwd()

# define dictionary for saving prepared features (save_flag=1 to save and save_flag=0 otherwise)
save_dict = {'save_flag': 0, 'save_folder': ''}

# get time data
df_datetime_vector, indexes_5min_win2remove = eeg_class.prepare_time_data4feat_reduction(patient_index, seizure_index, pat_seiz_data_folder, save_dict)
df_datetime_vector.head()

# load patient information to get seizure onset date
eeg_onset_seizure = eeg_class.get_seizure_onset(patient_index, seizure_index)

df_time_min, df_time_h = eeg_class.get_time_data(df_datetime_vector, 'win_start_date', eeg_onset_seizure)


get_minute_time_vector = 1

if get_minute_time_vector == 1:
    time_vec_minutes = df_time_min['time_min']
    time_vector = time_vec_minutes*(-1)
    tickvals_lst = [-10, -50, -100, -150, -200, time_vector[0]]
    ticktext_lst = [-10, -50, -100, -150, -200, round(time_vector[0])]
else:
    time_vector = eeg_class.update_time_hour_night_transition(df_time_h)

# prepare feature data for feature reduction
df_seizure_data, original_feat_names, constant_feat_names, quasi_constant_feat_names = eeg_class.prepare_feature_data4feat_reduction(pat_seiz_data_folder, feature_group, indexes_5min_win2remove, save_dict)
df_seizure_data.head()


Unnamed: 0,Circular_correlation_theta_MS,WPLI_theta_A,Circular_correlation_alpha_A,Circular_correlation_gamma4_M,corr_max_xcorr_gamma1_MS,PSI_theta_M,PSI_beta_M,WPLI_delta_WGCC,dWPLI_theta_M,corr_max_xcorr_beta_MS,...,corr_max_xcorr_power_gamma1_MS,dWPLI_alpha_MS,ISPC_gamma1_M,WPLI_gamma2_CPL,Circular_correlation_gamma3_MCC,Circular_correlation_gamma2_GE,Circular_correlation_theta_WGCC,ISPC_gamma3_WGCC,ISPC_theta_M,ISPC_gamma2_WGCC
0,0.386524,0.875039,0.092866,-0.012882,-0.44717,0.002367,-0.022345,-0.586324,-0.278149,-0.690156,...,-0.540962,-0.695899,0.910444,-0.535316,1.028426,-0.038967,0.544772,-0.885291,0.463175,-0.196247
1,-1.248267,0.883137,0.534454,0.375969,-0.618571,0.002286,-0.024906,1.426013,1.463987,-0.440878,...,-0.40344,-0.361738,0.136503,-0.174264,1.185835,-0.162812,-1.157076,-0.885291,1.433422,-0.56199
2,-1.139668,0.858192,1.613167,1.286631,-0.360051,0.00222,-0.021949,-1.347149,1.37124,-0.81852,...,-0.470733,-0.539755,0.939651,0.286355,1.300484,-0.759442,-1.055721,-0.885291,1.683544,-0.911392
3,-0.353413,0.156531,0.62869,-0.650807,-0.534277,0.002248,-0.022954,-0.924154,-0.002048,-0.56964,...,-0.540962,-0.695899,0.863091,-1.174883,-0.320576,-0.040507,-0.600721,-0.232456,0.533717,-0.463163
4,-0.632802,-0.308169,-1.228111,0.351604,-0.532044,0.002354,-0.022189,-0.140931,0.624035,-0.815843,...,-0.540962,0.457747,0.102679,-0.677033,0.384539,0.275331,-0.721538,-0.410362,1.450525,0.320196


In [3]:
## Apply feature reduction, apply clustering methods and evaluate clustering solutions

clust_methods = ['KMEANS_K2', 'KMEANS_K3', 'KMEANS_K4', 'AGGLO_HIER_K2', 'AGGLO_HIER_K3', 'AGGLO_HIER_K4', 'HDBSCAN0', 'GMM_K2', 'GMM_K3', 'GMM_K4']

clust_eval = ['clust_method', 'n_clusters', 'noisy_clusters', 'n_samples_smaller_cluster', 'DI', 'SI', 'OD', 'CS', 'C', 'DBI']

print(clust_eval)

['clust_method', 'n_clusters', 'noisy_clusters', 'n_samples_smaller_cluster', 'DI', 'SI', 'OD', 'CS', 'C', 'DBI']


In [None]:
df_pca, pca = eeg_class.pca_feature_reduction(df_seizure_data)
df_pca.head()

distances_matrix = pairwise_distances(df_pca)
clustering_method = 'DBSCAN_D3'
clustering_solution = eeg_class.perform_clustering(df_pca, clustering_method)
eval_list = eeg_class.cluster_evaluation_indexes(df_pca, clustering_solution, distances_matrix)
explained_variance = sum(pca.explained_variance_ratio_)

import plotly.express as px
fig = go.Figure(data=[go.Scatter3d(x=df_pca['PC1'], y=df_pca['PC2'], z=df_pca['PC3'], mode='markers',
                                   marker=dict(size=3, color=time_vector, colorscale='Viridis', showscale=True,
                                           colorbar=dict(len=0.3, x=0, y=0.84, titleside='right', thickness=20,
                                                         title='<b>Time before seizure (min)<b>', outlinewidth=1,
                                                         titlefont=dict(size=12), tickmode='array',
                                                         tickvals=tickvals_lst, ticktext=ticktext_lst,
                                                         ticklabelposition='outside')))])


# tight layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()



Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)


In [None]:
# t-SNE
list_perplexity = np.arange(10, 110, 10)
tc1 = []
tc2 = []
tc3 = []
dic_tsne_clust_results = []
for perplexity in list_perplexity:

    df_tsne, array_tsne = eeg_class.tsne_feature_reduction(df_seizure_data, perplexity)

    tc1.append(array_tsne[:, 0])
    tc2.append(array_tsne[:, 1])
    tc3.append(array_tsne[:, 2])

    # get pairwise distances between samples
    distances_matrix = pairwise_distances(df_tsne)

    # start_time = datetime.now()
    for cm in clust_methods:
        # print({p}, {s}, {cm}, 'tsne', {perplexity})
        clustering_solution = eeg_class.perform_clustering(df_tsne, cm)

        eval_list = eeg_class.cluster_evaluation_indexes(df_tsne, clustering_solution, distances_matrix)
        dic_tsne_clust_results.append([perplexity, cm.lower()] + eval_list)

lst_string_perplexity = list(map(str, list_perplexity.tolist()))
df_tc1 = pd.DataFrame(tc1).transpose()
df_tc1.columns = lst_string_perplexity
df_tc2 = pd.DataFrame(tc2).transpose()
df_tc2.columns = lst_string_perplexity
df_tc3 = pd.DataFrame(tc3).transpose()
df_tc3.columns = lst_string_perplexity
df_tsne_clust_results = pd.DataFrame(dic_tsne_clust_results, columns=['perplexity'] + clust_eval)


In [None]:
# UMAP
n_neighbours_vec = np.arange(10, 110, 10)
min_dist_vec = np.round(np.arange(0.1, 1.0, 0.1), 2)
uc1 = []
uc2 = []
uc3 = []
dic_umap_clust_results = []
lst_header = []
for n_neighbours in n_neighbours_vec:
    for min_dist in min_dist_vec:

        lst_header.append(str(n_neighbours) + '_' + str(min_dist))

        df_umap, array_umap = eeg_class.umap_feature_reduction(df_seizure_data, n_neighbours, min_dist)

        uc1.append(array_umap[:, 0])
        uc2.append(array_umap[:, 1])
        uc3.append(array_umap[:, 2])

        # get pairwise distances between samples
        distances_matrix = pairwise_distances(df_umap)

        # start_time = datetime.now()
        for cm in clust_methods:
            # print({p}, {s}, {cm}, 'umap', {n_neighbours}, {min_dist})
            clustering_solution = eeg_class.perform_clustering(df_umap, cm)

            eval_list = eeg_class.cluster_evaluation_indexes(df_umap, clustering_solution, distances_matrix)

            dic_umap_clust_results.append([n_neighbours, min_dist, cm.lower()] + eval_list)
            # print('end clustering')
        # print('end all clustering methods in umap')

df_uc1 = pd.DataFrame(uc1).transpose()
df_uc1.columns = lst_header
df_uc2 = pd.DataFrame(uc2).transpose()
df_uc2.columns = lst_header
df_uc3 = pd.DataFrame(uc3).transpose()
df_uc3.columns = lst_header

df_umap_clust_results = pd.DataFrame(dic_umap_clust_results, columns=['n_neighbors', 'min_dist'] + clust_eval)