In [None]:
#This block of code was used to debug the data, the .set files and the .fdt were not matching and some data has less/more data than expected 
from scipy.io import loadmat, savemat
import os

base_path = "Sleep_dep_dataset"


for i in range(1, 72): 
    patient_id = f"sub-{i:02d}"
    for session in ['1', '2']:  
        set_file_path = os.path.join(base_path, patient_id, f"ses-{session}", "eeg", f"{patient_id}_ses-{session}_task-eyesopen_eeg.set")
        fdt_file_path = os.path.join(base_path, patient_id, f"ses-{session}", "eeg", f"{patient_id}_ses-{session}_task-eyesopen_eeg.fdt")
        
        if not os.path.exists(set_file_path):
            print(f"Missing .set file: {set_file_path}")
            continue
        if not os.path.exists(fdt_file_path):
            print(f"Missing .fdt file: {fdt_file_path}")
            continue
    
        try:
            eeg_data = loadmat(set_file_path)['EEG']
            nbchan = eeg_data[0, 0]['nbchan'][0, 0]  
            pnts_set = eeg_data[0, 0]['pnts'][0, 0]  
            srate = eeg_data[0, 0]['srate'][0, 0]    
        except Exception as e:
            print(f"Error reading .set file: {set_file_path}, Error: {e}")
            continue

        expected_size = nbchan * pnts_set * 4  
        actual_size = os.path.getsize(fdt_file_path)
        pnts_actual = actual_size // (int(nbchan) * int(4))
        if pnts_set != pnts_actual:
            print(f"Mismatch for {patient_id}, Session {session}:")
            print(f"  - .set file points: {pnts_set}")
            print(f"  - .fdt file points: {pnts_actual}")
            print("channels", nbchan)
            print("sample rate", srate)
            # Fix the .set file metadata
            print(f"  -> Fixing .set file to match .fdt file points ({pnts_actual})")
            eeg_data[0, 0]['pnts'][0, 0] = pnts_actual
            try:
                savemat(set_file_path, {'EEG': eeg_data})
                print(f"  -> Fixed metadata in: {set_file_path}")
            except Exception as e:
                print(f"  -> Error saving fixed .set file: {e}")
        else:
            print(f"Patient {patient_id}, Session {session}: Metadata matches the .fdt file.")


In [None]:
import mne

eeg_path = "Sleep_dep_dataset/sub-43/ses-2/eeg/new/sub-43_ses-2_task-eyesopen_eeg.set"
raw = mne.io.read_raw_eeglab(eeg_path, preload=True)

print(f"Sampling rate: {raw.info['sfreq']} Hz")
print(f"Data shape: {raw.get_data().shape}")  

raw.plot()


In [None]:
#DOWNSAMPLE THE DATA THAT WAS 5000HZ
import mne
import os

datasets = [
    "Sleep_dep_dataset/sub-39/ses-2/eeg/sub-39_ses-2_task-eyesopen_eeg.set",
    "Sleep_dep_dataset/sub-44/ses-2/eeg/sub-44_ses-2_task-eyesopen_eeg.set",
    "Sleep_dep_dataset/sub-43/ses-2/eeg/sub-43_ses-2_task-eyesopen_eeg.set"
]

for file_path in datasets:
    print(f"Processing: {file_path}")

    raw = mne.io.read_raw_eeglab(file_path, preload=True)
    original_rate = raw.info['sfreq']
    print(f"Original sampling rate: {original_rate} Hz")

    # Downsample to 500 Hz
    new_rate = 500
    raw.resample(new_rate)
    print(f"Downsampled to: {raw.info['sfreq']} Hz")

    # Delete existing .set and .fdt files 
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f"Deleted: {file_path}")
    fdt_file_path = file_path.replace(".set", ".fdt")
    if os.path.exists(fdt_file_path):
        os.remove(fdt_file_path)
        print(f"Deleted: {fdt_file_path}")

    raw.export(file_path, fmt='eeglab')
    print(f"Saved downsampled file: {file_path}")


In [None]:
#calculation for power spectrum
import numpy as np
import matplotlib.pyplot as plt
import mne
import os

psd_normal = []
psd_sd = []

fmin, fmax = 2, 40  # Hz

for i in range(1, 72):
    patient_id = f"sub-{i:02d}"
    for session, condition in zip(['1', '2'], ['normal sleep', 'sleep deprivation']):
        eeg_path = os.path.join("Sleep_dep_dataset", patient_id, "ses-" + session, "eeg", patient_id + "_ses-" + session + "_task-eyesopen_eeg.set")
        
        if os.path.exists(eeg_path):
            raw = mne.io.read_raw_eeglab(eeg_path, preload=True)
            
            psd = raw.compute_psd(method="welch", fmin=fmin, fmax=fmax, n_fft=2048)
            psd_data = psd.get_data()  
            freqs = psd.freqs
            mean_psd = np.mean(psd_data, axis=0)  

            if condition == 'normal sleep':
                psd_normal.append(mean_psd)
            else:
                psd_sd.append(mean_psd)

psd_normal = np.array(psd_normal)
psd_sd = np.array(psd_sd)

mean_normal = np.mean(psd_normal, axis=0)
sem_normal = np.std(psd_normal, axis=0) / np.sqrt(psd_normal.shape[0])

mean_sd = np.mean(psd_sd, axis=0)
sem_sd = np.std(psd_sd, axis=0) / np.sqrt(psd_sd.shape[0])


In [None]:
#PLOT FOR POWER SPECTRUM
plt.figure(figsize=(8, 6))
plt.plot(freqs, 10 * np.log10(mean_normal), label='Normal Sleep', color='magenta')
plt.fill_between(freqs, 10 * np.log10(mean_normal - sem_normal), 10 * np.log10(mean_normal + sem_normal), color='magenta', alpha=0.3)

plt.plot(freqs, 10 * np.log10(mean_sd), label='Sleep Deprivation', color='green')
plt.fill_between(freqs, 10 * np.log10(mean_sd - sem_sd), 10 * np.log10(mean_sd + sem_sd), color='green', alpha=0.3)

plt.xlabel('Frequency (Hz)')
plt.ylabel('Absolute Power (dB)')
plt.title('Comparison of Power Spectrum')
plt.legend(loc='upper right')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
#PREPROCESSING, BANDPASS FILTER, ICA, REMOVE BAD CHANNELS (electrodes), THRESHOLD FOR EPOCHS 
import numpy as np
import matplotlib.pyplot as plt
import mne
import os
import csv

psd_normal = []
psd_sd = []


fmin, fmax = 2, 40  # Hz

# Bandpass filter 
filter_low = 0.2   
filter_high = 45  

epoch_duration = 4.0  # seconds
total_epochs_normal = 0
total_epochs_SD = 0
total_rejected_epochs_normal= 0
total_rejected_epochs_SD = 0
datasets_with_all_epochs_rejected = 0
epoch_summary = []

output_dir = "preprocessed_epochs"
os.makedirs(output_dir, exist_ok=True)

for i in range(1, 72):
    patient_id = f"sub-{i:02d}"
    for session, condition in zip(['1', '2'], ['normal sleep', 'sleep deprivation']):
        eeg_path = os.path.join("Sleep_dep_dataset", patient_id, "ses-" + session, "eeg", patient_id + "_ses-" + session + "_task-eyesopen_eeg.set")
        
        if os.path.exists(eeg_path):
            # print(f"Processing: {eeg_path}")
            raw = mne.io.read_raw_eeglab(eeg_path, preload=True)
            
            # 10-20 international montage 
            montage = mne.channels.make_standard_montage('standard_1020')
            raw.set_montage(montage) 

            # bandpass filter 
            raw.filter(l_freq=filter_low, h_freq=filter_high, fir_design='firwin')
            
            # Detect bad channels based on signal deviations
            data = raw.get_data()  
            channel_means = np.mean(data, axis=1)  
            channel_stds = np.std(data, axis=1)  
            
            z_scores = (channel_means - np.mean(channel_means)) / np.std(channel_means)
            bads = [raw.info['ch_names'][i] for i, z in enumerate(z_scores) if abs(z) > 3]
            
            raw.info['bads'] = bads  
            
            # Interpolate bad channels 
            if bads:
                raw.interpolate_bads(reset_bads=True)
                # print(f"Interpolated bad channels: {bads}")
            
            # ICA
            ica = mne.preprocessing.ICA(n_components=20, random_state=97, max_iter=800)
            ica.fit(raw)
            
            # fp1 and fp2 are ones near the eyes, use as refernece
            eog_indices, eog_scores = ica.find_bads_eog(raw, ch_name=['Fp1', 'Fp2'])
            ica.exclude.extend(eog_indices)

            raw = ica.apply(raw)

            epochs = mne.make_fixed_length_epochs(raw, duration=epoch_duration, preload=True)
            num_epochs = len(epochs)
            
            # reject epochs based on threshold, cite paper 
            reject_criteria = dict(eeg=200e-6 if condition == 'normal sleep' else 220e-6)
            epochs = epochs.drop_bad(reject=reject_criteria)
            num_rejected_epochs = num_epochs - len(epochs)

            if condition == 'normal sleep':
                total_rejected_epochs_normal += num_rejected_epochs
                total_epochs_normal += num_epochs
            elif condition == 'sleep deprivation':
                total_rejected_epochs_SD += num_rejected_epochs
                total_epochs_SD += num_epochs

            if len(epochs) == 0:
                datasets_with_all_epochs_rejected += 1
                # print(f"All epochs rejected for dataset: {eeg_path}. Skipping...")
                continue
            
            output_file = os.path.join(output_dir, f"{patient_id}_ses-{session}_retained-epochs.fif")
            epochs.save(output_file, overwrite=True)

            epoch_summary.append({
                "Patient ID": patient_id,
                "Session": session,
                "Condition": condition,
                "Total Epochs": num_epochs,
                "Rejected Epochs": num_rejected_epochs,
                "Retained Epochs": len(epochs)
            })
            psd = epochs.compute_psd(method="welch", fmin=fmin, fmax=fmax, n_fft=2000)
            psd_data = psd.get_data()  
            freqs = psd.freqs
            psd_mean = np.mean(psd_data, axis=(0, 1)) 
            
            num_epochs = len(epochs)
            num_rejected_epochs = num_epochs - len(epochs)

            if condition == 'normal sleep':
                psd_normal.append(psd_mean)
            else:
                psd_sd.append(psd_mean)

psd_normal = np.array(psd_normal)
psd_sd = np.array(psd_sd)

mean_normal = np.mean(psd_normal, axis=0)
sem_normal = np.std(psd_normal, axis=0) / np.sqrt(psd_normal.shape[0])

mean_sd = np.mean(psd_sd, axis=0)
sem_sd = np.std(psd_sd, axis=0) / np.sqrt(psd_sd.shape[0])

output_file = "epoch_rejection_summary.csv"
with open(output_file, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=["Patient ID", "Session", "Condition", "Total Epochs", "Rejected Epochs", "Retained Epochs"])
    writer.writeheader()
    writer.writerows(epoch_summary)


In [None]:
epochs.plot_drop_log()
ica.plot_components()

print("\nSummary of Epoch Rejection:")
print(f"Total Datasets Processed: {2 * 71}")  # 71 subjects × 2 sessions
print(f"Datasets with All Epochs Rejected: {datasets_with_all_epochs_rejected}")
print(f"Total SD Rejected Epochs: {total_rejected_epochs_SD}")
print(total_epochs_SD)
print(total_epochs_normal)
print(f"Total Normal Rejected Epochs: {total_rejected_epochs_normal}")
print(f"Percentage of SD Rejected Epochs: {100 * total_rejected_epochs_SD / total_epochs_SD:.2f}%")
print(f"Percentage of Normal Rejected Epochs: {100 * total_rejected_epochs_normal / total_epochs_normal:.2f}%")


In [None]:
plt.figure(figsize=(8, 6))
plt.plot(freqs, 10 * np.log10(mean_normal), label='Normal Sleep', color='magenta')
plt.fill_between(freqs, 10 * np.log10(mean_normal - sem_normal), 10 * np.log10(mean_normal + sem_normal), color='magenta', alpha=0.3)

plt.plot(freqs, 10 * np.log10(mean_sd), label='Sleep Deprivation', color='green')
plt.fill_between(freqs, 10 * np.log10(mean_sd - sem_sd), 10 * np.log10(mean_sd + sem_sd), color='green', alpha=0.3)

plt.xlabel('Frequency (Hz)')
plt.ylabel('Absolute Power (dB)')
plt.title('Comparison of Power Spectrum')
plt.legend(loc='upper right')
plt.grid(True)
plt.tight_layout()
plt.show()