In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import mne

In [235]:
import entropy as ent
import neurokit2 as nk
from scipy.signal import welch
from nolds import lyap_r
import antropy as ant
import joblib

In [57]:
participantsInfo = pd.read_table('./ds004504/participants.tsv')

In [125]:
#Getting all the groups
A_sub = participantsInfo[participantsInfo["Group"] == "A"]["participant_id"].tolist()
C_sub = participantsInfo[participantsInfo["Group"] == "C"]["participant_id"].tolist()
F_sub = participantsInfo[participantsInfo["Group"] == "F"]["participant_id"].tolist()

In [141]:
freq_bands = {
    "Delta": (0.5, 4),
    "Theta": (4, 8),
    "Alpha": (8, 12),
    "Beta": (12, 30),
}

import pandas as pd
import numpy as np 

def generate_subject_dataframe(num_subjects: int) -> pd.DataFrame:
    """
    Generates a DataFrame where each subject has 19 channels, 
    and each channel has 4 bands (Delta, Theta, Alpha, Beta).
    
    Parameters:
        num_subjects (int): Number of subjects to include in the DataFrame.
    
    Returns:
        pd.DataFrame: A DataFrame with hierarchical indexing for subjects, channels, and bands for each window.
    """
    subjects = [f"sub-{i+1:03d}" for i in range(num_subjects)]  # Format subject IDs as sub-XXX
    # channels = [f"Channel-{i+1}" for i in range(19)]
    channels = ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6', 'Fz', 'Cz', 'Pz']
    bands = ["Delta", "Theta", "Alpha", "Beta"]
    
    data = []
    for subject in subjects:
        for channel in channels:
            for band, val in freq_bands.items():
                value = np.random.rand()  # Example random value, replace with actual data if needed
                data.append([subject, channel, band, []])
    
    df = pd.DataFrame(data, columns=["Subject", "Channel", "Band", "Window Avg"])

    #the .setIndex is creating a multiIndex DataFrame
    return df.set_index(["Subject", "Channel", "Band"])

# Example usage:
# df = generate_subject_dataframe(5)  # Generates a DataFrame for 5 subjects
# print(df)

In [142]:
def derivativesPath(sub): #dericatives is the preprocessed data
    return f"ds004504/{sub}/eeg/{sub}_task-eyesclosed_eeg.set"



NUM_SUBJECTS = 3
# Use the function inside the groups dictionary
groups = {
    'Alzeimers': generate_subject_dataframe(NUM_SUBJECTS),
    'Control': generate_subject_dataframe(NUM_SUBJECTS),
    'Dementia': generate_subject_dataframe(NUM_SUBJECTS),
}

# Print the DataFrame for 'Alzeimers'

WINDOW_LENGTH = 1.5
STEP_SIZE = .75 #50% sliding window


# ** this is the part where we would do a for loop per group, per subject in that group **

In [209]:
#*this data would be from forloop variables ... for group in groups ... for subject in NUM_SUBJECTS:
group = 'Alzeimers'
index = 0

In [144]:
current_sub = A_sub[index]
raw_sub = mne.io.read_raw_eeglab(derivativesPath(current_sub), preload=True)
sfreq = raw_sub.info['sfreq']

In [145]:

start_times = np.arange(0, raw.times[-1] - WINDOW_LENGTH, STEP_SIZE)
events = np.array([
    [int(t * sfreq), 0, 1] for t in start_times  # MNE event format
])

# Create epochs with a fixed window size
epochs = mne.Epochs(
    raw_sub, events, event_id=1,
    tmin=0, tmax=WINDOW_LENGTH,
    baseline=None, detrend=1, preload=True
)

Not setting metadata
798 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 798 events and 751 original time points ...
0 bad epochs dropped


In [146]:
# Adding a item to thef 
# groups['Alzeimers'].loc[("sub-001", "Channel-1", "Delta"), "Window Avg"].append(22)
print(groups['Alzeimers'])

                      Window Avg
Subject Channel Band            
sub-001 Fp1     Delta         []
                Theta         []
                Alpha         []
                Beta          []
        Fp2     Delta         []
...                          ...
sub-003 Cz      Beta          []
        Pz      Delta         []
                Theta         []
                Alpha         []
                Beta          []

[228 rows x 1 columns]


In [147]:
epoch_0 = epochs[0]
epoch_0.compute_psd() #just realised I should computer PSD then sperate that by the time window and the fmin/fmax from Specrtum.get_data()
epoch_0.get_data()

    Using multitaper spectrum estimation with 7 DPSS windows


array([[[-2.86620380e-05, -1.97269244e-05, -6.83673269e-06, ...,
         -9.50994150e-05, -9.24631295e-05, -9.23170784e-05],
        [-4.68908077e-05, -4.28435982e-05, -4.09936543e-05, ...,
         -8.99882684e-05, -8.37437933e-05, -8.37981463e-05],
        [-2.30665682e-05, -1.58160014e-05, -2.16025441e-05, ...,
         -1.62441899e-05, -1.21674513e-05, -5.99110328e-06],
        ...,
        [-4.05761815e-05, -3.60218472e-05, -3.59108722e-05, ...,
         -5.12548730e-06, -2.96373106e-06, -1.04611544e-06],
        [-4.08194824e-05, -3.69885051e-05, -3.40852631e-05, ...,
          8.44295782e-06,  1.12485455e-05,  1.58607728e-05],
        [-4.88376176e-05, -4.49578770e-05, -4.23964959e-05, ...,
          1.49270643e-05,  1.76837656e-05,  2.37119360e-05]]])

In [207]:
epoch_data = epochs.get_data()
channels_avg = epoch_data.mean(axis=-1)
print(channels_avg)

[[-1.15494239e-20 -5.54372349e-20 -2.45425259e-20 ... -3.58032142e-20
   5.77471197e-21  1.61691935e-20]
 [ 2.07889631e-20 -9.96137815e-21  5.48597637e-21 ...  1.44367799e-21
  -4.61976958e-21 -2.05002275e-20]
 [ 1.96340207e-20  5.54372349e-20 -1.73241359e-20 ... -7.50712556e-21
  -1.03944815e-20 -5.42822925e-20]
 ...
 [ 3.58032142e-20  3.40708006e-20  4.04229838e-20 ...  9.23953915e-21
   1.03944815e-20  4.73526382e-20]
 [-1.73241359e-21  1.61691935e-20  3.58032142e-20 ...  2.42537903e-20
   4.50427534e-20  1.50142511e-20]
 [ 3.37820650e-20 -5.19724077e-21  4.61976958e-20 ...  5.77471197e-21
  -3.63806854e-20  3.98455126e-20]]


In [149]:
#example of mutating desicred thingy
# groups['Alzeimers'].loc[("sub-001", "Channel-1", "Delta"), "Window Avg"].append(22)
# print(groups['Alzeimers'].loc[("sub-001", "Channel-1", "Delta")])
# groups['Alzeimers'].loc[("sub-001", "Channel-1", "Delta"), "Window Avg"].pop()

In [150]:
print(epochs[0])
print(len(epochs))

<Epochs | 1 events (all good), 0 – 1.5 s (baseline off), ~143 kB, data loaded,
 '1': 1>
798


In [151]:
# KeyError: ('sub-001', 'Fp1', 'Delta')
print(groups['Control'])


                      Window Avg
Subject Channel Band            
sub-001 Fp1     Delta         []
                Theta         []
                Alpha         []
                Beta          []
        Fp2     Delta         []
...                          ...
sub-003 Cz      Beta          []
        Pz      Delta         []
                Theta         []
                Alpha         []
                Beta          []

[228 rows x 1 columns]


In [219]:
#to clear out the data in windows
for group in groups.keys():
    groups[group]["Window Avg"] = groups[group]["Window Avg"].apply(lambda x: [])


In [238]:
#for epoch in epochs:
import time
start = time.time()
group = 'Alzeimers'
for x in range(len(epochs)):
    epoch = epochs[x]
    epoch_psd = epoch.compute_psd(fmin=freq_bands["Delta"][0], fmax=freq_bands["Beta"][1], verbose=False) #computing the max PSD between the wave lengths
    for channel_name in channel_names:
                for band, (band_low, band_high) in freq_bands.items():   
                    # groups[group].loc[(current_sub, channel_name, band), "Window Avg"].append(21)
                    # print(group)
                    channel_at_freq = epoch_psd.get_data(picks=channel_name , fmin=band_low, fmax=band_high)
                    channel_at_freq_avg = channel_at_freq.mean(axis=-1)[0][0] # am I computing the average correctly ? 
                    #print(channel_at_freq_avg)
                    #print(channel_at_freq.mean(axis=-1))
                    #print(channel_at_freq.mean(axis=-1)[0][0])
                    groups[group].loc[(current_sub, channel_name, band), "Window Avg"].append(channel_at_freq_avg)
                    #groups[group].loc[(current_sub, channel_name, band), "Window Avg"].append(channel_at_freq_avg)
                        #groups[group][current_sub][bandname][channel].append(channels_avg['channel']) # i think the stuff after teh .append is wrongly formatted 


print(time.time() - start)

11.659667015075684


In [237]:
print("Alzeimers preview")
print(groups['Alzeimers'].loc["sub-001"])
print("Dementia preview")
print(groups['Dementia'].loc["sub-001"])
print("Control preview")
print(groups['Control'].loc["sub-001"])

Alzeimers preview
                                                      Window Avg
Channel Band                                                    
Fp1     Delta  [22, 6.608459244053082e-07, 4.5940200778354104...
        Theta  [1.3203320724332953e-08, 1.1858158244737662e-0...
        Alpha  [2.9003921037250706e-09, 3.4611214363582827e-0...
        Beta   [4.348326037721751e-10, 4.3949036611322243e-10...
Fp2     Delta  [7.673272292107713e-07, 4.0872890122911074e-08...
...                                                          ...
Cz      Beta   [2.404118099848597e-10, 2.4165411116677754e-10...
Pz      Delta  [4.9779476973346886e-08, 3.405565062138606e-08...
        Theta  [6.68025039579135e-09, 5.082621867600136e-09, ...
        Alpha  [1.9780196757133883e-09, 1.6280563582418197e-0...
        Beta   [2.5787183930951195e-10, 2.2537497445582267e-1...

[76 rows x 1 columns]
Dementia preview
                                                      Window Avg
Channel Band                    

In [192]:
print(groups['Alzeimers'].loc["sub-001"])

              Window Avg
Channel Band            
Fp1     Delta         []
        Theta         []
        Alpha         []
        Beta          []
Fp2     Delta         []
...                  ...
Cz      Beta          []
Pz      Delta         []
        Theta         []
        Alpha         []
        Beta          []

[76 rows x 1 columns]


In [170]:
print(groups['Alzeimers'].loc["sub-001"])

                                                      Window Avg
Channel Band                                                    
Fp1     Delta  [[[6.608459244053082e-07]], [[4.59402007783541...
        Theta  [[[1.3203320724332953e-08]], [[1.1858158244737...
        Alpha  [[[2.9003921037250706e-09]], [[3.4611214363582...
        Beta   [[[4.348326037721751e-10]], [[4.39490366113222...
Fp2     Delta  [[[7.673272292107713e-07]], [[4.08728901229110...
...                                                          ...
Cz      Beta   [[[2.404118099848597e-10]], [[2.41654111166777...
Pz      Delta  [[[4.9779476973346886e-08]], [[3.4055650621386...
        Theta  [[[6.68025039579135e-09]], [[5.082621867600136...
        Alpha  [[[1.9780196757133883e-09]], [[1.6280563582418...
        Beta   [[[2.5787183930951195e-10]], [[2.2537497445582...

[76 rows x 1 columns]


In [None]:
# Previous attempts below

In [None]:
for bandname, (low, high) in freq_bands:
    for epoch in epochs.get_data(fmin=low, fmax=high):
        channels_avg = epoch_data.mean(axis=-1) #all the channels average at that frequency band
        # channels_avg = an_epoch.mean(axis=-1) #all the channels average at that frequency band
        for channel in channel_names:
            groups[group][current_sub][bandname][channel].append(channels_avg['channel']) # i think the stuff after teh .append is wrongly formatted 

In [136]:
for bandname, (low, high) in freq_bands:
    # for epoch in epochs:
    for x in range(len(epochs)):
        epoch = epochs[x]
        epoch.computer_psd(fmin=freq_bands["Delta"][0], fmax=freq_bands["Beta"][1]) #computing the max PSD between the wave lengths
        #channels_avg = epoch_data.mean(axis=-1) #all the channels average at that frequency band
        # channels_avg = an_epoch.mean(axis=-1) #all the channels average at that frequency band
        for channel in channel_names:
            for 
            groups[group][current_sub][bandname][channel].append(channels_avg['channel']) # i think the stuff after teh .append is wrongly formatted 
        
            
            # for channelGroup in groups['Alzeimers']
        #     groups[channelGroup] #append the channels avg accrouss the 19 

# channels_avg = epochs_A_0.get_data(fmin=0.1, fmax=20).mean(axis=-1)

SyntaxError: invalid syntax (2909161960.py, line 9)

In [None]:
#replicating above ^ doing it differently
for bandname, (low, high) in freq_bands:

    
    
    #for epoch_data in epochs.get_data(fmin=low, fmax=high):
    for epoch in epochs:
        '''
        an_epochs_psd = epochs[0].compute_psd(method='multitaper', fmin=0.1, fmax=20.0)
an_epochs_psd.plot() # this plots all channels *tested works*
an_epochs_psd.plot(picks=channel_names[0]) # this plots a channel *tested works*
**do this stuff here **
        '''
        e
        channels_avg = an_epoch.mean(axis=-1) #all the channels average at that frequency band
        for channelGroup in groups['Alzeimers']
            groups['Alzeimers'] #append the channels avg accrouss the 19 spots respectivly 

# channels_avg = epochs_A_0.get_data(fmin=0.1, fmax=20).mean(axis=-1)