In [1]:
# ONLY NEED TO RUN THIS SECTION ONCE

import numpy as np
import pandas as pd
import re
import datetime
import mne

gender_age = pd.read_excel('gender-age.xlsx', header=None)
all_signals = pd.read_excel('columns.xlsx', header=None)

In [2]:
# ALL INPUTS THAT NEED TO BE CHANGED ARE HERE

# Change Individual - Make sure to download the corresponding txt and edf files and move them to same folder as this script
individual = 'n3'

# Data Previews: https://archive.physionet.org/cgi-bin/atm/ATM
# Set Input:
#    Set Database to CAP Sleep Database (capslpdb)
#    Set Record to the Correct Individual
# Set Toolbox to "Export Signals as CSV"
# You should be able to see the start time of the study.

# Copy the start time parameters below:

study_start_year = 2009
study_start_month = 1
study_start_day = 1
study_start_hour = 22
study_start_min = 15
study_start_second = 42

In [3]:
# PART 1 - RUN AFTER CHANGING INPUTS

# Defines gender, age, and condition for the individual:
gender = list(gender_age[gender_age[0] == individual.upper()][1])[0]
age = list(gender_age[gender_age[0] == individual.upper()][2])[0]
condition_code = re.search(r'\D+', individual).group()
conditions = {'n':'Normal', 'nfle':'Nocturnal Frontal Lobe Epilepsy', 'ins':'Insomnia', 'plm':'Periodic Leg Movement',
             'rbd':'REM Behavior Disorder'}
condition = conditions[condition_code]

# Reads txt file and filters to just sleep stages:
df2=pd.read_csv(individual+'.txt')[17:]
df2=df2.iloc[:,0].str.split('\t', expand=True)
df2.columns=[x for x in df2.iloc[0]]
df2=df2.iloc[1:]
primary_loc = df2.groupby('Location').count()['Event'].sort_values(ascending=False).index[0]
df2=df2[df2['Location'] == primary_loc]

# Calculates number of waveform measurements between study start time and first txt timestamp:
timestamp_comps = df2['Time [hh:mm:ss]'].iloc[0].split(":")
first_recording_day = study_start_day + 1 if int(timestamp_comps[0]) < study_start_hour else study_start_day
first_timestamp = datetime.datetime(study_start_year, study_start_month, first_recording_day,
                                    int(timestamp_comps[0]), int(timestamp_comps[1]), int(timestamp_comps[2]))
study_start = datetime.datetime(study_start_year, study_start_month, study_start_day, study_start_hour, study_start_min, study_start_second)
lag = 500*(first_timestamp - study_start).seconds

# Adds epochs and features:
df2['epoch']=[i for i in range(0,len(df2.iloc[:,0]))]
df2['condition'] = [condition for i in range(0, len(df2.iloc[:,0]))]
df2['gender'] = [gender for i in range(0, len(df2.iloc[:,0]))]
df2['age'] = [age for i in range(0, len(df2.iloc[:,0]))]
df2=df2[['epoch', 'Sleep Stage', 'condition', 'gender', 'age']]

df2

Unnamed: 0,epoch,Sleep Stage,condition,gender,age
18,0,W,Normal,F,35
19,1,W,Normal,F,35
20,2,S1,Normal,F,35
23,3,S1,Normal,F,35
24,4,S1,Normal,F,35
...,...,...,...,...,...
1367,994,W,Normal,F,35
1368,995,W,Normal,F,35
1369,996,W,Normal,F,35
1370,997,W,Normal,F,35


In [7]:
# PART 2 - RUN AFTER PART 1

# Reads edf data
edf = mne.io.read_raw_edf(individual + '.edf') 
F = edf.get_data(start = lag)

# Defines list of signals in edf
signal = edf.ch_names
signal = ['DX1-DX2' if x == 'Dx1-DX2' else x for x in signal]
signal = ['SAO2' if x == 'SpO2' else x for x in signal]
signal = ['Fp2-F4' if x == 'F2-F4' else x for x in signal]

# Adds epochs and defines column names based on signals
D = F.transpose()
df = pd.DataFrame(D)
df.columns= signal
df['elapsed_seconds'] = [0.002*i for i in range(0, len(df.iloc[:,0]))]
df['epoch'] = df['elapsed_seconds'] // 30
df=df[['epoch','Fp2-F4', 'F4-C4', 'C4-P4', 'P4-O2','C4-A1','ROC-LOC','EMG1-EMG2','ECG1-ECG2','DX1-DX2','SX1-SX2', 'SAO2']]

df

Extracting EDF parameters from /Users/Ashok/OneDrive - Georgia Institute of Technology/Summer 2022/CS 4641/Project/n3.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


Unnamed: 0,epoch,Fp2-F4,F4-C4,C4-P4,P4-O2,C4-A1,ROC-LOC,EMG1-EMG2,ECG1-ECG2,DX1-DX2,SX1-SX2,SAO2
0,0.0,1.083639e-06,-8.089133e-07,-5.647131e-07,0.000004,-4.578755e-08,-0.000007,-1.607046e-06,0.000032,-1.831502e-07,3.663004e-07,98.020905
1,0.0,8.089133e-07,-1.984127e-07,-6.868132e-07,0.000004,1.205739e-06,-0.000007,-1.300942e-06,0.000035,-2.510804e-07,1.506773e-07,98.020905
2,0.0,4.426129e-07,3.510379e-07,-8.394383e-07,0.000003,3.250916e-06,-0.000006,-6.581234e-07,0.000038,-2.612076e-07,-1.280371e-07,98.020905
3,0.0,1.526252e-08,8.394383e-07,-9.615385e-07,0.000003,5.265568e-06,-0.000005,-1.300942e-06,0.000039,-1.943223e-07,-3.756840e-07,98.020905
4,0.0,-3.510379e-07,1.144689e-06,-9.920635e-07,0.000002,7.463370e-06,-0.000003,-4.423202e-06,0.000038,-6.105006e-08,-5.189255e-07,98.020905
...,...,...,...,...,...,...,...,...,...,...,...,...
15412227,1027.0,-3.228022e-05,9.355922e-06,8.928571e-06,0.000006,6.456044e-06,0.000087,-8.188280e-06,0.000019,4.445801e-05,2.640336e-05,98.020905
15412228,1027.0,-3.099817e-05,9.966422e-06,9.813797e-06,0.000006,7.432845e-06,0.000071,-3.504890e-06,0.000018,3.171551e-05,3.598901e-05,98.020905
15412229,1027.0,-2.999084e-05,1.054640e-05,1.069902e-05,0.000006,7.799145e-06,0.000042,-2.280474e-06,0.000017,1.406827e-05,6.701148e-05,98.020905
15412230,1027.0,-2.861722e-05,1.103480e-05,1.146215e-05,0.000007,8.012821e-06,0.000007,-8.096449e-06,0.000017,-3.302198e-06,1.136342e-04,98.020905


In [None]:
# Run this after feature extraction

final = pd.merge(df2, df, how='left', on='epoch')
final