In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import mne
import entropy as ent
import neurokit2 as nk
from scipy.signal import welch
from nolds import lyap_r
import antropy as ant
import joblib

In [8]:
participantsInfo = pd.read_table('./ds004504/participants.tsv')
#Getting all the groups
A_sub = participantsInfo[participantsInfo["Group"] == "A"]["participant_id"].tolist()
C_sub = participantsInfo[participantsInfo["Group"] == "C"]["participant_id"].tolist()
D_sub = participantsInfo[participantsInfo["Group"] == "F"]["participant_id"].tolist()

In [9]:
freq_bands = {
    "Delta": (0.5, 4),
    "Theta": (4, 8),
    "Alpha": (8, 12),
    "Beta": (12, 30),
}

import pandas as pd
import numpy as np 

#this generates the dataframe persubject
def generate_subject_dataframe(subjects) -> pd.DataFrame:
    """
    Generates a DataFrame where each subject has 19 channels, 
    and each channel has 4 bands (Delta, Theta, Alpha, Beta).
    
    Parameters:
        num_subjects (int): Number of subjects to include in the DataFrame.
    
    Returns:
        pd.DataFrame: A DataFrame with hierarchical indexing for subjects, channels, and bands for each window.
    """
    # subjects = [f"sub-{i:03d}" for i in range(start_sub, end_sub + 1)]  # Format subject IDs as sub-XXX
    channels = ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6', 'Fz', 'Cz', 'Pz']
    bands = ["Delta", "Theta", "Alpha", "Beta"]
    
    data = []
    for subject in subjects:
        for channel in channels:
            for band, val in freq_bands.items():
                value = np.random.rand()  # Example random value, replace with actual data if needed
                data.append([subject, channel, band, []])
    
    df = pd.DataFrame(data, columns=["Subject", "Channel", "Band", "Window Avg"])

    #the .setIndex is creating a multiIndex DataFrame
    return df.set_index(["Subject", "Channel", "Band"])


    # Example usage:
# df = generate_subject_dataframe(5)  # Generates a DataFrame for 5 subjects
# print(df)

In [10]:
def derivativesPath(sub): #dericatives is the preprocessed data
    return f"ds004504/{sub}/eeg/{sub}_task-eyesclosed_eeg.set"



# NUM_SUBJECTS = 2
# # Use the function inside the groups dictionary
# groups = {
#     'Alzeimers': generate_subject_dataframe(A_sub[0:NUM_SUBJECTS]),
#     'Control': generate_subject_dataframe(C_sub[0:NUM_SUBJECTS]),
#     'Dementia': generate_subject_dataframe(D_sub[0:NUM_SUBJECTS]),
# }

# NUM_SUBJECTS = 2
# # Use the function inside the groups dictionary
groups = {
    'Alzeimers': generate_subject_dataframe(A_sub),
    'Control': generate_subject_dataframe(C_sub),
    'Dementia': generate_subject_dataframe(D_sub),
}

# Print the DataFrame for 'Alzeimers'

# WINDOW_LENGTH = 1.5
# STEP_SIZE = .75 #50% sliding window
WINDOW_LENGTH = 3
STEP_SIZE = 1.5

In [11]:
print(A_sub[0:NUM_SUBJECTS])

NameError: name 'NUM_SUBJECTS' is not defined

In [13]:
print(groups['Alzeimers'])
print(groups['Control'])
print(groups['Dementia'])

                      Window Avg
Subject Channel Band            
sub-001 Fp1     Delta         []
                Theta         []
                Alpha         []
                Beta          []
        Fp2     Delta         []
...                          ...
sub-036 Cz      Beta          []
        Pz      Delta         []
                Theta         []
                Alpha         []
                Beta          []

[2736 rows x 1 columns]
                      Window Avg
Subject Channel Band            
sub-037 Fp1     Delta         []
                Theta         []
                Alpha         []
                Beta          []
        Fp2     Delta         []
...                          ...
sub-065 Cz      Beta          []
        Pz      Delta         []
                Theta         []
                Alpha         []
                Beta          []

[2204 rows x 1 columns]
                      Window Avg
Subject Channel Band            
sub-066 Fp1     Delta     

# ** this is the part where we would do a for loop per group, per subject in that group **


In [14]:
#TODO: add more different types of data and more subjects ,
#Make it run faster by multiprocessing

In [15]:
import time
global_start = time.time()
#** how to run this in parallel, speciall the computer PSD for each epoch
for group in groups:
    print(f'Current Group: {group}')
    df = groups[group]

    subjects = df.index.get_level_values("Subject").unique()
    for subject in subjects:
        subject_start_time = time.time()
        raw_sub = mne.io.read_raw_eeglab(derivativesPath(subject), preload=True)
        sfreq = raw_sub.info['sfreq']
        
        # Create epochs with a fixed window size
        print(f'\tCurrent Subject: {subject}')

        start_times = np.arange(0, raw_sub.times[-1] - WINDOW_LENGTH, STEP_SIZE) 
        events = np.array([
        [int(t * sfreq), 0, 1] for t in start_times  # MNE event format
        ]) 
        
        epochs = mne.Epochs(
            raw_sub, events, event_id=1,
            tmin=0, tmax=WINDOW_LENGTH,
            baseline=None, detrend=1, preload=True, verbose=False
        )

        channel_names = raw_sub.info['ch_names']

        for x in range(len(epochs)):
            pass
            epoch = epochs[x]
            epoch_psd = epoch.compute_psd(fmin=freq_bands["Delta"][0], fmax=freq_bands["Beta"][1], verbose=False) #computing the max PSD between the wave lengths
            #print(f'Current epock: {x}') 
            for channel_name in channel_names:
                        pass
                        # print(f'\t\tCurrent channel: {channel_name}')
                        for band, (band_low, band_high) in freq_bands.items():   
                            pass
                            #print(f'Current band: {band}')
                            channel_at_freq = epoch_psd.get_data(picks=channel_name , fmin=band_low, fmax=band_high)
                            channel_at_freq_avg = channel_at_freq.mean(axis=-1)[0][0] # am I computing the average correctly ? 
                            groups[group].loc[(subject, channel_name, band), "Window Avg"].append(channel_at_freq_avg)
        print(time.time() - subject_start_time)


print(time.time() - global_start)

Current Group: Alzeimers
	Current Subject: sub-001
5.873037099838257
	Current Subject: sub-002
9.155900001525879
	Current Subject: sub-003
2.3059802055358887
	Current Subject: sub-004
7.530365943908691
	Current Subject: sub-005
9.29798674583435
	Current Subject: sub-006
6.575322866439819
	Current Subject: sub-007
8.705209016799927
	Current Subject: sub-008
9.283087968826294
	Current Subject: sub-009
6.244942665100098
	Current Subject: sub-010
24.101158142089844
	Current Subject: sub-011
9.617300987243652
	Current Subject: sub-012
13.441014051437378
	Current Subject: sub-013
10.524911880493164
	Current Subject: sub-014
13.915184020996094
	Current Subject: sub-015
13.013603925704956
	Current Subject: sub-016
15.790639162063599
	Current Subject: sub-017
10.311507225036621
	Current Subject: sub-018
10.183284044265747
	Current Subject: sub-019
13.21225094795227
	Current Subject: sub-020
10.684706926345825
	Current Subject: sub-021
13.081006050109863
	Current Subject: sub-022
9.7406020164489

In [18]:
print(groups['Control'].loc[('sub-037', 'Fp1', 'Delta'), "Window Avg"])

[np.float64(1.399211858791581e-07), np.float64(1.4053479315283195e-07), np.float64(6.098015367755147e-08), np.float64(3.69258777982178e-08), np.float64(5.0237017686608855e-08), np.float64(9.375328776744823e-08), np.float64(2.938769738012508e-08), np.float64(4.804709461619144e-08), np.float64(7.820311073265787e-08), np.float64(1.0964640663924213e-07), np.float64(8.023016548886901e-08), np.float64(2.2195450565198713e-07), np.float64(1.3746962021093614e-07), np.float64(1.0982572269028044e-07), np.float64(3.6194514543656036e-07), np.float64(2.3505523453089784e-07), np.float64(8.1623448013982e-08), np.float64(8.155809561180829e-08), np.float64(7.171596169525016e-08), np.float64(8.689382458602037e-08), np.float64(5.233096427031352e-08), np.float64(1.281425696087604e-07), np.float64(1.07663186139329e-07), np.float64(8.847116830747788e-08), np.float64(8.614358984253707e-08), np.float64(8.152626598443409e-08), np.float64(8.202844511901899e-08), np.float64(1.0002875688602628e-07), np.float64(2.2

In [19]:
# Print Alzheimer's preview
print(f"Alzheimer's preview for {A_sub[0]}")
print(groups["Alzeimers"].loc[A_sub[0]]) 

# Print Control preview
print(f"Control preview {C_sub[0]}")
print(groups["Control"].loc[C_sub[0]])  

# Print Dementia preview
print(f"Dementia preview {D_sub[0]}")
print(groups["Dementia"].loc[D_sub[0]])  

Alzheimer's preview for sub-001
                                                      Window Avg
Channel Band                                                    
Fp1     Delta  [5.607240819161222e-07, 2.136206557103639e-07,...
        Theta  [1.4290601340850907e-08, 2.322876797863479e-08...
        Alpha  [2.877208402057422e-09, 1.7605723328087318e-09...
        Beta   [5.046764330104262e-10, 5.503217253688624e-10,...
Fp2     Delta  [5.465781151565962e-07, 2.1364434568508706e-07...
...                                                          ...
Cz      Beta   [3.205899994916665e-10, 3.5335248547358265e-10...
Pz      Delta  [1.5562185234722875e-07, 2.6102165698186886e-0...
        Theta  [4.1626502960822e-09, 7.189374463466653e-09, 6...
        Alpha  [1.2805820794756784e-09, 1.1287029520125796e-0...
        Beta   [3.338654724073159e-10, 3.9571401318770685e-10...

[76 rows x 1 columns]
Control preview sub-037
                                                      Window Avg
Channel Ban

In [None]:
# saving the dataframes
groups['Alzeimers'].to_csv("Alzeimers.csv")
groups['Dementia'].to_csv("Dementia.csv")
groups['Control'].to_csv("Control.csv")


In [91]:
# ** next is PCA analysis , 
#step 1 , split the data into training and testing
#step 2 , Normalize the data
#step 3 , do PCA  ... looks really simple actually ? weird .. maybe do 2 pca's one for H to D and one for H to A and maybe H to not H