In [5]:
# import pyforest
import mne
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

import numpy as np
from scipy.spatial.distance import pdist
from scipy.spatial import distance
from scipy.sparse import lil_matrix
from scipy.spatial.distance import cdist
from scipy import linalg
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.metrics import adjusted_rand_score
from sklearn.cluster import SpectralClustering

## .EDF to DATAFRAME and ANNOTATE (add LABELS)

In [6]:
def edf_to_df_list(folder_path):
    
    # Get a list of all .edf files in the specified folder
    edf_files = [f for f in os.listdir(folder_path) if f.endswith('.edf')]
    
    # Create an empty list to store DataFrames
    temp_eeg_dataframe_list = []
    eeg_dataframe_list = []
    annotations_dataframe_list = []

    for edf_file in edf_files:
        # Construct the full file path for each .edf file
        edf_file_path = os.path.join(folder_path, edf_file)

        # Read EEG data from the .edf file
        raw_edf_data = mne.io.read_raw_edf(edf_file_path, preload=True)

        # Create Dataframe of EEG data and append to List 
        temp_eeg_dataframe_list.append(raw_edf_data.to_data_frame())

        # Create Dataframe of EEG data annotations and append to List 
        annotations_dataframe_list.append(raw_edf_data.annotations.to_data_frame())

    for df in temp_eeg_dataframe_list:
        
        mask = (df.drop(columns=["time"]) == 0).all(axis=1)
        eeg_dataframe_list.append(df[~mask])
        

    return eeg_dataframe_list, annotations_dataframe_list, edf_files

In [7]:
def annotate_eeg_df(eeg_df, annotations_df):

    tem_eeg_df = eeg_df.copy()
    
    subset_df_list = []

    previous_duration = 0.0
    
    for index, row in annotations_df.iterrows():
        
        duration = row.duration
    
        # Add the previous duration to the current duration
        duration += previous_duration
    
        # Find the subset of new_df based on the current duration
        subset_df = tem_eeg_df[tem_eeg_df.time <= duration]
    
        # Label the subset with the annotation description
        subset_df['Class'] = row.description
        subset_df_list.append(subset_df)
    
        # Remove the labeled subset from new_df
        tem_eeg_df = tem_eeg_df.drop(subset_df.index)
    
        # Update previous_duration for the next iteration
        previous_duration = duration
        
        # Print the result if needed
        # print(f"Processed annotation {index}: Duration = {duration}, Description = {row.description}")
    
    combined_df = pd.concat(subset_df_list, ignore_index = True)

    return combined_df

In [8]:
folder_path = r'.\files\S001'
edf_df_list, annotations_df_list, edf_files = edf_to_df_list(folder_path)

Extracting EDF parameters from C:\Users\mondal\Desktop\eeg_eye_movement\files\S001\S001R01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 9759  =      0.000 ...    60.994 secs...
Extracting EDF parameters from C:\Users\mondal\Desktop\eeg_eye_movement\files\S001\S001R02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 9759  =      0.000 ...    60.994 secs...
Extracting EDF parameters from C:\Users\mondal\Desktop\eeg_eye_movement\files\S001\S001R03.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from C:\Users\mondal\Desktop\eeg_eye_movement\files\S001\S001R04.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from C:\Users\mondal\Desktop

In [9]:
len(edf_df_list)

14

In [10]:
len(annotations_df_list)

14

In [11]:
annotated_eeg_df_list = []

for eeg_df, annotation_df in zip(edf_df_list, annotations_df_list):

    annotated_eeg_df_list.append(annotate_eeg_df(eeg_df, annotation_df))

    

In [12]:
for df, file_name in zip(annotated_eeg_df_list, edf_files):

    df["Id"] = file_name.split(".")[0] 

In [13]:
annotated_eeg_df_list[1]

Unnamed: 0,time,Fc5.,Fc3.,Fc1.,Fcz.,Fc2.,Fc4.,Fc6.,C5..,C3..,...,Po3.,Poz.,Po4.,Po8.,O1..,Oz..,O2..,Iz..,Class,Id
0,0.00000,-46.0,-41.0,-32.0,-24.0,-23.0,-24.0,-15.0,-41.0,-29.0,...,23.0,50.0,69.0,52.0,54.0,40.0,108.0,55.0,T0,S001R02
1,0.00625,-54.0,-48.0,-34.0,-21.0,-22.0,-31.0,-26.0,-36.0,-32.0,...,38.0,64.0,67.0,46.0,63.0,64.0,114.0,74.0,T0,S001R02
2,0.01250,-58.0,-53.0,-41.0,-28.0,-31.0,-40.0,-35.0,-37.0,-34.0,...,27.0,53.0,50.0,28.0,78.0,84.0,119.0,98.0,T0,S001R02
3,0.01875,-60.0,-58.0,-47.0,-29.0,-31.0,-42.0,-34.0,-45.0,-41.0,...,16.0,32.0,26.0,-3.0,72.0,71.0,99.0,87.0,T0,S001R02
4,0.02500,-33.0,-27.0,-14.0,6.0,5.0,-4.0,12.0,-16.0,-17.0,...,5.0,10.0,8.0,-29.0,50.0,37.0,55.0,52.0,T0,S001R02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9627,60.16875,-31.0,-13.0,-2.0,15.0,5.0,13.0,3.0,-29.0,-18.0,...,-78.0,-72.0,-40.0,-27.0,-96.0,-83.0,-62.0,-72.0,T0,S001R02
9628,60.17500,-12.0,8.0,11.0,21.0,7.0,6.0,-4.0,2.0,3.0,...,-71.0,-67.0,-43.0,-31.0,-100.0,-83.0,-62.0,-62.0,T0,S001R02
9629,60.18125,33.0,37.0,29.0,31.0,9.0,5.0,-11.0,49.0,31.0,...,-55.0,-65.0,-56.0,-54.0,-92.0,-79.0,-67.0,-54.0,T0,S001R02
9630,60.18750,53.0,58.0,54.0,59.0,34.0,23.0,-8.0,57.0,45.0,...,-36.0,-67.0,-63.0,-57.0,-80.0,-87.0,-91.0,-55.0,T0,S001R02


In [14]:
final_df = pd.concat(annotated_eeg_df_list)

In [15]:
final_df

Unnamed: 0,time,Fc5.,Fc3.,Fc1.,Fcz.,Fc2.,Fc4.,Fc6.,C5..,C3..,...,Po3.,Poz.,Po4.,Po8.,O1..,Oz..,O2..,Iz..,Class,Id
0,0.00000,-16.0,-29.0,2.0,22.0,-12.0,-23.0,-46.0,-36.0,-26.0,...,-52.0,-35.0,-22.0,-33.0,-53.0,-21.0,-11.0,15.0,T0,S001R01
1,0.00625,-56.0,-54.0,-27.0,-4.0,-31.0,-36.0,-56.0,-75.0,-55.0,...,-29.0,-18.0,-3.0,-9.0,-53.0,-12.0,1.0,21.0,T0,S001R01
2,0.01250,-55.0,-55.0,-29.0,-5.0,-29.0,-34.0,-52.0,-53.0,-42.0,...,-12.0,-6.0,4.0,-7.0,-45.0,2.0,18.0,35.0,T0,S001R01
3,0.01875,-50.0,-44.0,-13.0,13.0,-16.0,-25.0,-45.0,-44.0,-21.0,...,4.0,14.0,20.0,7.0,-29.0,16.0,35.0,47.0,T0,S001R01
4,0.02500,-36.0,-28.0,13.0,42.0,9.0,-10.0,-46.0,-34.0,-12.0,...,6.0,20.0,24.0,6.0,-13.0,29.0,40.0,50.0,T0,S001R01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19915,124.46875,100.0,100.0,99.0,103.0,98.0,95.0,82.0,121.0,110.0,...,92.0,94.0,47.0,24.0,89.0,77.0,-6.0,37.0,T2,S001R14
19916,124.47500,117.0,128.0,130.0,124.0,119.0,122.0,106.0,133.0,131.0,...,87.0,96.0,68.0,63.0,78.0,74.0,7.0,43.0,T2,S001R14
19917,124.48125,122.0,142.0,147.0,141.0,136.0,144.0,135.0,124.0,130.0,...,86.0,106.0,107.0,110.0,71.0,82.0,44.0,56.0,T2,S001R14
19918,124.48750,116.0,135.0,142.0,142.0,129.0,124.0,90.0,120.0,132.0,...,88.0,99.0,104.0,96.0,72.0,81.0,53.0,44.0,T2,S001R14


#### Spectral Clustering

In [17]:
data_df = final_df.drop(["time","Class","Id"], axis=1)
data_df

Unnamed: 0,Fc5.,Fc3.,Fc1.,Fcz.,Fc2.,Fc4.,Fc6.,C5..,C3..,C1..,...,P8..,Po7.,Po3.,Poz.,Po4.,Po8.,O1..,Oz..,O2..,Iz..
0,-16.0,-29.0,2.0,22.0,-12.0,-23.0,-46.0,-36.0,-26.0,-18.0,...,-30.0,-56.0,-52.0,-35.0,-22.0,-33.0,-53.0,-21.0,-11.0,15.0
1,-56.0,-54.0,-27.0,-4.0,-31.0,-36.0,-56.0,-75.0,-55.0,-43.0,...,-20.0,-35.0,-29.0,-18.0,-3.0,-9.0,-53.0,-12.0,1.0,21.0
2,-55.0,-55.0,-29.0,-5.0,-29.0,-34.0,-52.0,-53.0,-42.0,-35.0,...,-20.0,-23.0,-12.0,-6.0,4.0,-7.0,-45.0,2.0,18.0,35.0
3,-50.0,-44.0,-13.0,13.0,-16.0,-25.0,-45.0,-44.0,-21.0,-11.0,...,-16.0,-12.0,4.0,14.0,20.0,7.0,-29.0,16.0,35.0,47.0
4,-36.0,-28.0,13.0,42.0,9.0,-10.0,-46.0,-34.0,-12.0,8.0,...,-9.0,-7.0,6.0,20.0,24.0,6.0,-13.0,29.0,40.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19915,100.0,100.0,99.0,103.0,98.0,95.0,82.0,121.0,110.0,115.0,...,40.0,74.0,92.0,94.0,47.0,24.0,89.0,77.0,-6.0,37.0
19916,117.0,128.0,130.0,124.0,119.0,122.0,106.0,133.0,131.0,134.0,...,64.0,68.0,87.0,96.0,68.0,63.0,78.0,74.0,7.0,43.0
19917,122.0,142.0,147.0,141.0,136.0,144.0,135.0,124.0,130.0,138.0,...,85.0,65.0,86.0,106.0,107.0,110.0,71.0,82.0,44.0,56.0
19918,116.0,135.0,142.0,142.0,129.0,124.0,90.0,120.0,132.0,138.0,...,64.0,71.0,88.0,99.0,104.0,96.0,72.0,81.0,53.0,44.0


In [None]:
class_labels, class_mappings = pd.factorize(final_df["Class"])
labels = class_labels.tolist()
len(labels)

In [None]:
# Pairwise distances
dimension = data_df.shape[0]

dist_mat = lil_matrix((dimension, dimension), dtype=float)

for i in range(dimension):
    for j in range(i+1, dimension):
        d = distance.euclidean(data_df.iloc[i], data_df.iloc[j])
        dist_mat[i, j] = d
        dist_mat[j, i] = d

In [19]:
from joblib import Parallel, delayed

# Number of CPU cores to use for parallelization
n_cores = 4  # Adjust this to match the number of available CPU cores

# Calculate pairwise distances in parallel
def calculate_distance(i, j):
    d = distance.euclidean(data_df.iloc[i], data_df.iloc[j])
    return d

dimension = data_df.shape[0]
dist_mat = Parallel(n_jobs=n_cores)(delayed(calculate_distance)(i, j) for i in range(dimension) for j in range(i+1, dimension))

# Reshape the results into a distance matrix
dist_mat = np.array(dist_mat).reshape((dimension, dimension))

KeyboardInterrupt: 