In [1]:
import os
import mne
import numpy as np
import pandas as pd
from scipy.signal import resample
import warnings
warnings.filterwarnings("ignore")

In [2]:
SAMPLE_RATE = 128  # fs
# SAMPLE_LEN = 1.0   # sample seconds
# OVERLAPPING = 0.8  # overlapping seconds
sub_folder_path = str(SAMPLE_RATE) + 'Hz'
sub_folder_path

'128Hz'

In [3]:
# root dir
root = 'ADFTD-PS/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-001,F,57,A,16
1,sub-002,F,78,A,22
2,sub-003,M,70,A,14
3,sub-004,F,67,A,20
4,sub-005,M,70,A,22
...,...,...,...,...,...
83,sub-084,F,71,F,24
84,sub-085,M,64,F,26
85,sub-086,M,49,F,26
86,sub-087,M,73,F,24


## Labels

In [15]:
labels = np.empty(shape=(participants.shape[0],2), dtype='int32')
labels.shape

(88, 2)

In [16]:
label_map = {'A':1, 'F':2, 'C':0}
for i, participant in enumerate(participants.values):
    # print(participant)
    pid = int(participant[0][-3:])
    label = label_map[participant[3]]
    # print(pid)
    # print(label)
    labels[i,0] = label
    labels[i,1] = pid

In [17]:
label_path = 'Processed/' + sub_folder_path + '/ADFTD-PS/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [18]:
np.load(label_path + '/label.npy')

array([[ 1,  1],
       [ 1,  2],
       [ 1,  3],
       [ 1,  4],
       [ 1,  5],
       [ 1,  6],
       [ 1,  7],
       [ 1,  8],
       [ 1,  9],
       [ 1, 10],
       [ 1, 11],
       [ 1, 12],
       [ 1, 13],
       [ 1, 14],
       [ 1, 15],
       [ 1, 16],
       [ 1, 17],
       [ 1, 18],
       [ 1, 19],
       [ 1, 20],
       [ 1, 21],
       [ 1, 22],
       [ 1, 23],
       [ 1, 24],
       [ 1, 25],
       [ 1, 26],
       [ 1, 27],
       [ 1, 28],
       [ 1, 29],
       [ 1, 30],
       [ 1, 31],
       [ 1, 32],
       [ 1, 33],
       [ 1, 34],
       [ 1, 35],
       [ 1, 36],
       [ 0, 37],
       [ 0, 38],
       [ 0, 39],
       [ 0, 40],
       [ 0, 41],
       [ 0, 42],
       [ 0, 43],
       [ 0, 44],
       [ 0, 45],
       [ 0, 46],
       [ 0, 47],
       [ 0, 48],
       [ 0, 49],
       [ 0, 50],
       [ 0, 51],
       [ 0, 52],
       [ 0, 53],
       [ 0, 54],
       [ 0, 55],
       [ 0, 56],
       [ 0, 57],
       [ 0, 58],
       [ 0, 59

## Features

In [8]:
derivatives_root = os.path.join(root, 'derivatives/eeglab/')
derivatives_root

'ADFTD-PS/derivatives/eeglab/'

In [9]:
# Test for bad channels, sampling freq and shape
bad_channel_list, sampling_freq_list, data_shape_list = [], [], []
for sub in os.listdir(derivatives_root):
    if 'sub-' in sub:
        sub_path = os.path.join(derivatives_root, sub, 'eeg/')
        # print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                # get bad channels
                bad_channel = raw.info['bads']
                bad_channel_list.append(bad_channel)
                # get sampling frequency
                sampling_freq = raw.info['sfreq']
                sampling_freq_list.append(sampling_freq)
                # get eeg data
                data = raw.get_data()
                data_shape = data.shape
                data_shape_list.append(data_shape)

In [10]:
from collections import Counter

print(bad_channel_list)
print(data_shape_list[0])
print("Channel number counter:", Counter(i[0] for i in data_shape_list))
print("Sampling rate counter:", Counter(sampling_freq_list))

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
(19, 151675)
Channel number counter: Counter({19: 88})
Sampling rate counter: Counter({500.0: 88})


In [11]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

In [12]:
feature_path = 'Processed/' + sub_folder_path + '/ADFTD-PS/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

sub_id = 1
for sub in os.listdir(derivatives_root):
    if 'sub-' in sub:
        li_sub = []
        sub_path = os.path.join(derivatives_root, sub, 'eeg/')
        print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                sampling_freq = raw.info['sfreq']
                data = raw.get_data().T
                print("Raw data shape ", data.shape)
                data = resample_time_series(data, sampling_freq, SAMPLE_RATE)
                print("Downsampling and segmented data shape ", data.shape)
                np.save(feature_path + '/feature_{:02d}.npy'.format(sub_id), data)
        sub_id += 1
    print("-------------------------------------\n")

-------------------------------------

ADFTD-PS/derivatives/eeglab/sub-001\eeg/
Raw data shape  (151675, 19)
Downsampling and segmented data shape  (38828, 19)
-------------------------------------

ADFTD-PS/derivatives/eeglab/sub-002\eeg/
Raw data shape  (193505, 19)
Downsampling and segmented data shape  (49537, 19)
-------------------------------------

ADFTD-PS/derivatives/eeglab/sub-003\eeg/
Raw data shape  (137093, 19)
Downsampling and segmented data shape  (35095, 19)
-------------------------------------

ADFTD-PS/derivatives/eeglab/sub-004\eeg/
Raw data shape  (167997, 19)
Downsampling and segmented data shape  (43007, 19)
-------------------------------------

ADFTD-PS/derivatives/eeglab/sub-005\eeg/
Raw data shape  (146404, 19)
Downsampling and segmented data shape  (37479, 19)
-------------------------------------

ADFTD-PS/derivatives/eeglab/sub-006\eeg/
Raw data shape  (194556, 19)
Downsampling and segmented data shape  (49806, 19)
-------------------------------------

A

In [14]:
# Test the saved npy file
# example

path = feature_path

total_length = 0
for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)
    total_length += np.load(sub_path).shape[0]
print("\nTotal length:", total_length)

(38828, 19)
(49537, 19)
(35095, 19)
(43007, 19)
(37479, 19)
(49806, 19)
(54619, 19)
(48850, 19)
(52474, 19)
(38037, 19)
(37375, 19)
(43014, 19)
(58933, 19)
(63609, 19)
(36477, 19)
(54107, 19)
(53281, 19)
(46637, 19)
(39745, 19)
(49198, 19)
(19032, 19)
(32882, 19)
(9327, 19)
(9327, 19)
(33130, 19)
(10893, 19)
(12059, 19)
(48305, 19)
(12103, 19)
(12019, 19)
(9959, 19)
(9256, 19)
(12711, 19)
(9879, 19)
(53682, 19)
(10227, 19)
(70361, 19)
(45007, 19)
(47951, 19)
(22301, 19)
(43622, 19)
(26547, 19)
(38600, 19)
(62685, 19)
(49131, 19)
(67650, 19)
(54856, 19)
(47258, 19)
(54720, 19)
(40024, 19)
(57005, 19)
(54353, 19)
(41879, 19)
(56022, 19)
(46871, 19)
(54438, 19)
(43847, 19)
(29651, 19)
(36836, 19)
(54997, 19)
(49575, 19)
(46682, 19)
(52068, 19)
(51472, 19)
(24208, 19)
(32329, 19)
(45559, 19)
(28857, 19)
(63247, 19)
(53767, 19)
(62738, 19)
(44432, 19)
(71918, 19)
(39838, 19)
(45824, 19)
(58170, 19)
(35028, 19)
(44968, 19)
(10893, 19)
(9583, 19)
(11413, 19)
(9128, 19)
(8834, 19)
(9029, 19)
(