In [1]:
import os
import mne
import numpy as np
import pandas as pd
from scipy.signal import resample
import warnings
warnings.filterwarnings("ignore")

In [2]:
SAMPLE_RATE = 128  # fs
# SAMPLE_LEN = 1.0   # sample seconds
# OVERLAPPING = 0.8  # overlapping seconds
sub_folder_path = str(SAMPLE_RATE) + 'Hz'
sub_folder_path

'128Hz'

In [3]:
# root dir
root = 'ADFTD-RS/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-001,F,57,A,16
1,sub-002,F,78,A,22
2,sub-003,M,70,A,14
3,sub-004,F,67,A,20
4,sub-005,M,70,A,22
...,...,...,...,...,...
83,sub-084,F,71,F,24
84,sub-085,M,64,F,26
85,sub-086,M,49,F,26
86,sub-087,M,73,F,24


## Labels

In [4]:
labels = np.empty(shape=(participants.shape[0],2), dtype='int32')
labels.shape

(88, 2)

In [5]:
label_map = {'A':1, 'F':2, 'C':0}
for i, participant in enumerate(participants.values):
    # print(participant)
    pid = int(participant[0][-3:])
    label = label_map[participant[3]]
    # print(pid)
    # print(label)
    labels[i,0] = label
    labels[i,1] = pid

In [6]:
label_path = 'Processed/' + sub_folder_path + '/ADFTD-RS/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [7]:
np.load(label_path + '/label.npy')

array([[ 1,  1],
       [ 1,  2],
       [ 1,  3],
       [ 1,  4],
       [ 1,  5],
       [ 1,  6],
       [ 1,  7],
       [ 1,  8],
       [ 1,  9],
       [ 1, 10],
       [ 1, 11],
       [ 1, 12],
       [ 1, 13],
       [ 1, 14],
       [ 1, 15],
       [ 1, 16],
       [ 1, 17],
       [ 1, 18],
       [ 1, 19],
       [ 1, 20],
       [ 1, 21],
       [ 1, 22],
       [ 1, 23],
       [ 1, 24],
       [ 1, 25],
       [ 1, 26],
       [ 1, 27],
       [ 1, 28],
       [ 1, 29],
       [ 1, 30],
       [ 1, 31],
       [ 1, 32],
       [ 1, 33],
       [ 1, 34],
       [ 1, 35],
       [ 1, 36],
       [ 0, 37],
       [ 0, 38],
       [ 0, 39],
       [ 0, 40],
       [ 0, 41],
       [ 0, 42],
       [ 0, 43],
       [ 0, 44],
       [ 0, 45],
       [ 0, 46],
       [ 0, 47],
       [ 0, 48],
       [ 0, 49],
       [ 0, 50],
       [ 0, 51],
       [ 0, 52],
       [ 0, 53],
       [ 0, 54],
       [ 0, 55],
       [ 0, 56],
       [ 0, 57],
       [ 0, 58],
       [ 0, 59

## Features

In [8]:
derivatives_root = os.path.join(root, 'derivatives/')
derivatives_root

'ADFTD-RS/derivatives/'

In [9]:
# Test for bad channels, sampling freq and shape
bad_channel_list, sampling_freq_list, data_shape_list = [], [], []
for sub in os.listdir(derivatives_root):
    if 'sub-' in sub:
        sub_path = os.path.join(derivatives_root, sub, 'eeg/')
        # print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                # get bad channels
                bad_channel = raw.info['bads']
                bad_channel_list.append(bad_channel)
                # get sampling frequency
                sampling_freq = raw.info['sfreq']
                sampling_freq_list.append(sampling_freq)
                # get eeg data
                data = raw.get_data()
                data_shape = data.shape
                data_shape_list.append(data_shape)

In [10]:
# 0 bad channels
print(bad_channel_list)
# 500 Hz for all runs
print(sampling_freq_list)
# same number of channels & different timestamps
print(data_shape_list)

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0]
[(19, 299900), (19, 396550), (

In [11]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

In [12]:
feature_path = 'Processed/' + sub_folder_path + '/ADFTD-RS/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

sub_id = 1
for sub in os.listdir(derivatives_root):
    if 'sub-' in sub:
        li_sub = []
        sub_path = os.path.join(derivatives_root, sub, 'eeg/')
        print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                sampling_freq = raw.info['sfreq']
                data = raw.get_data().T
                print("Raw data shape ", data.shape)
                data = resample_time_series(data, sampling_freq, SAMPLE_RATE)
                print("Downsampling and segmented data shape ", data.shape)
                np.save(feature_path + '/feature_{:02d}.npy'.format(sub_id), data)
        sub_id += 1
    print("-------------------------------------\n")

ADFTD-RS/derivatives/sub-001\eeg/
Raw data shape  (299900, 19)
Downsampling and segmented data shape  (76774, 19)
-------------------------------------

ADFTD-RS/derivatives/sub-002\eeg/
Raw data shape  (396550, 19)
Downsampling and segmented data shape  (101516, 19)
-------------------------------------

ADFTD-RS/derivatives/sub-003\eeg/
Raw data shape  (153050, 19)
Downsampling and segmented data shape  (39180, 19)
-------------------------------------

ADFTD-RS/derivatives/sub-004\eeg/
Raw data shape  (353050, 19)
Downsampling and segmented data shape  (90380, 19)
-------------------------------------

ADFTD-RS/derivatives/sub-005\eeg/
Raw data shape  (402050, 19)
Downsampling and segmented data shape  (102924, 19)
-------------------------------------

ADFTD-RS/derivatives/sub-006\eeg/
Raw data shape  (316200, 19)
Downsampling and segmented data shape  (80947, 19)
-------------------------------------

ADFTD-RS/derivatives/sub-007\eeg/
Raw data shape  (383210, 19)
Downsampling and 

In [14]:
# Test the saved npy file
# example

path = feature_path

total_length = 0
for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)
    total_length += np.load(sub_path).shape[0]
print("\nTotal length:", total_length)

(76774, 19)
(101516, 19)
(39180, 19)
(90380, 19)
(102924, 19)
(80947, 19)
(98101, 19)
(101555, 19)
(78412, 19)
(164060, 19)
(98549, 19)
(113090, 19)
(107276, 19)
(119608, 19)
(115376, 19)
(124462, 19)
(107955, 19)
(108172, 19)
(117583, 19)
(111270, 19)
(117795, 19)
(105205, 19)
(106846, 19)
(97254, 19)
(87598, 19)
(115025, 19)
(105753, 19)
(104721, 19)
(94295, 19)
(70922, 19)
(147297, 19)
(102115, 19)
(90293, 19)
(124147, 19)
(94853, 19)
(107694, 19)
(99468, 19)
(114099, 19)
(108810, 19)
(123614, 19)
(113420, 19)
(123266, 19)
(106124, 19)
(112844, 19)
(109009, 19)
(96389, 19)
(103296, 19)
(126704, 19)
(99865, 19)
(104657, 19)
(96427, 19)
(97292, 19)
(99906, 19)
(107215, 19)
(104268, 19)
(101288, 19)
(101952, 19)
(96867, 19)
(100751, 19)
(95805, 19)
(102819, 19)
(115033, 19)
(103032, 19)
(108556, 19)
(113164, 19)
(70369, 19)
(82316, 19)
(73356, 19)
(81420, 19)
(61324, 19)
(79219, 19)
(84149, 19)
(109386, 19)
(129856, 19)
(95767, 19)
(104806, 19)
(89489, 19)
(111475, 19)
(104535, 19)
(11