In [None]:
# Import
import pandas as pd
import os
import numpy as np
from sklearn.metrics import pairwise_distances

from plotly.subplots import make_subplots
import plotly.graph_objects as go

from scipy.io import loadmat

from eeg_preictal_analysis import EEGPreictalAnalysis
eeg_class = EEGPreictalAnalysis()

feature_group = 'Multivariate' # 'Multivariate' OR 'Univariate_linear' OR 'Univariate_nonlinear'

In [None]:
## Prepare data to run feature reduction
patient_index = 112802
seizure_index = 6
pat_seiz_data_folder = 'patient' + str(patient_index) + '_seizure' + str(seizure_index)

# get current working directory
wd = os.getcwd()

# define dictionary for saving prepared features (save_flag=1 to save and save_flag=0 otherwise)
save_dict = {'save_flag': 0, 'save_folder': ''}

# get time data
df_datetime_vector, indexes_5min_win2remove = eeg_class.prepare_time_data4feat_reduction(patient_index, seizure_index, pat_seiz_data_folder, save_dict)
df_datetime_vector.head()

# load patient information to get seizure onset date
eeg_onset_seizure = eeg_class.get_seizure_onset(patient_index, seizure_index)

df_time_min, df_time_h = eeg_class.get_time_data(df_datetime_vector, 'win_start_date', eeg_onset_seizure)


get_time_vector = 'min'

if get_time_vector == 'min':
    time_vec_minutes = df_time_min['time_min']
    time_vector = time_vec_minutes*(-1)
    tickvals_lst = [-10, -50, -100, -150, -200, time_vector[0]]
    ticktext_lst = [-10, -50, -100, -150, -200, round(time_vector[0])]
else:
    time_vector = eeg_class.update_time_hour_night_transition(df_time_h)

# prepare feature data for feature reduction
df_seizure_data, original_feat_names, constant_feat_names, quasi_constant_feat_names = eeg_class.prepare_feature_data4feat_reduction(pat_seiz_data_folder, feature_group, indexes_5min_win2remove, save_dict)
df_seizure_data.head()


In [None]:
## Apply feature reduction, apply clustering methods and evaluate clustering solutions

clust_methods = ['KMEANS_K2', 'KMEANS_K3', 'KMEANS_K4', 'AGGLO_HIER_K2', 'AGGLO_HIER_K3', 'AGGLO_HIER_K4', 'HDBSCAN0', 'GMM_K2', 'GMM_K3', 'GMM_K4']

clust_eval = ['clust_method', 'n_clusters', 'noisy_clusters', 'n_samples_smaller_cluster', 'DI', 'SI', 'OD', 'CS', 'C', 'DBI']

print(clust_eval)

In [1]:
df_pca, pca = eeg_class.pca_feature_reduction(df_seizure_data)
df_pca.head()

distances_matrix = pairwise_distances(df_pca)
clustering_method = 'HDBSCAN0'
clustering_solution = eeg_class.perform_clustering(df_pca, clustering_method)
eval_list = eeg_class.cluster_evaluation_indexes(df_pca, clustering_solution, distances_matrix)
explained_variance = sum(pca.explained_variance_ratio_)

if clustering_method == 'HDBSCAN0':
    cluster_method = 'HDBSCAN'
elif clustering_method[0:-1] == 'AGGLO_HIER_K':
    cluster_method = 'Agglomerative Hierarchical K' + clustering_method[-1]
else:
    cluster_method = clustering_method.replace('_', ' ')


fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'scene'}, {'type': 'scene'}]],
                    subplot_titles=('<b>a PCA feature reduction',
                                    '<b>b ' + cluster_method + ' clustering solution'),
                    shared_yaxes=False, shared_xaxes=False, horizontal_spacing=0.1)

fig.add_trace(go.Scatter3d(x=df_pca['PC1'], y=df_pca['PC2'], z=df_pca['PC3'], mode='markers',
                           marker=dict(size=3, color=time_vector, colorscale='Viridis', showscale=True,
                                       colorbar=dict(len=1, x=-0.15, y=0.4, titleside='right', thickness=20,
                                                     title='<b>Time before seizure (' + get_time_vector +')<b>', outlinewidth=1, titlefont=dict(size=12), tickmode='array',
                                                     tickvals=tickvals_lst, ticktext=ticktext_lst,
                                                     ticklabelposition='outside'))))

# plot the clustering solution
fig.add_trace(go.Scatter3d(x=df_pca['PC1'], y=df_pca['PC2'], z=df_pca['PC3'], mode='markers',
                           marker=dict(size=3, color=clustering_solution, colorscale='Viridis')), row=1, col=2)


# tight layout
fig.update_layout(height=500, width=1000, margin=dict(l=100, r=200, b=20, t=70), title=dict(text='Patient ' + str(patient_index) + ', seizure ' + str(seizure_index), y=0.98, x=0.5, xanchor='center', yanchor='top'), template='plotly_white', showlegend=False)
fig.show()



NameError: name 'eeg_class' is not defined

In [None]:
# t-SNE
list_perplexity = np.arange(10, 110, 10)
tc1 = []
tc2 = []
tc3 = []
dic_tsne_clust_results = []
for perplexity in list_perplexity:

    df_tsne, array_tsne = eeg_class.tsne_feature_reduction(df_seizure_data, perplexity)

    tc1.append(array_tsne[:, 0])
    tc2.append(array_tsne[:, 1])
    tc3.append(array_tsne[:, 2])

    # get pairwise distances between samples
    distances_matrix = pairwise_distances(df_tsne)

    # start_time = datetime.now()
    for cm in clust_methods:
        # print({p}, {s}, {cm}, 'tsne', {perplexity})
        clustering_solution = eeg_class.perform_clustering(df_tsne, cm)

        eval_list = eeg_class.cluster_evaluation_indexes(df_tsne, clustering_solution, distances_matrix)
        dic_tsne_clust_results.append([perplexity, cm.lower()] + eval_list)

lst_string_perplexity = list(map(str, list_perplexity.tolist()))
df_tc1 = pd.DataFrame(tc1).transpose()
df_tc1.columns = lst_string_perplexity
df_tc2 = pd.DataFrame(tc2).transpose()
df_tc2.columns = lst_string_perplexity
df_tc3 = pd.DataFrame(tc3).transpose()
df_tc3.columns = lst_string_perplexity
df_tsne_clust_results = pd.DataFrame(dic_tsne_clust_results, columns=['perplexity'] + clust_eval)


In [None]:
# UMAP
n_neighbours_vec = np.arange(10, 110, 10)
min_dist_vec = np.round(np.arange(0.1, 1.0, 0.1), 2)
uc1 = []
uc2 = []
uc3 = []
dic_umap_clust_results = []
lst_header = []
for n_neighbours in n_neighbours_vec:
    for min_dist in min_dist_vec:

        lst_header.append(str(n_neighbours) + '_' + str(min_dist))

        df_umap, array_umap = eeg_class.umap_feature_reduction(df_seizure_data, n_neighbours, min_dist)

        uc1.append(array_umap[:, 0])
        uc2.append(array_umap[:, 1])
        uc3.append(array_umap[:, 2])

        # get pairwise distances between samples
        distances_matrix = pairwise_distances(df_umap)

        # start_time = datetime.now()
        for cm in clust_methods:
            # print({p}, {s}, {cm}, 'umap', {n_neighbours}, {min_dist})
            clustering_solution = eeg_class.perform_clustering(df_umap, cm)

            eval_list = eeg_class.cluster_evaluation_indexes(df_umap, clustering_solution, distances_matrix)

            dic_umap_clust_results.append([n_neighbours, min_dist, cm.lower()] + eval_list)
            # print('end clustering')
        # print('end all clustering methods in umap')

df_uc1 = pd.DataFrame(uc1).transpose()
df_uc1.columns = lst_header
df_uc2 = pd.DataFrame(uc2).transpose()
df_uc2.columns = lst_header
df_uc3 = pd.DataFrame(uc3).transpose()
df_uc3.columns = lst_header

df_umap_clust_results = pd.DataFrame(dic_umap_clust_results, columns=['n_neighbors', 'min_dist'] + clust_eval)