In [None]:
from psg import PSG
import matplotlib.pyplot as plt
from importlib import reload
import numpy as np
import pandas as pd
import constants
from ecg import ECG
from emg import EMG
import eeg
from eeg import EEG
from eog import EOG
from sao2 import *
from matplotlib import pyplot as plt
from scipy import stats
import gc

In [None]:
training_set_1 = ['ins2', 'ins5', 'ins6', 'ins7', 'n11', 'n2']
training_set_2 = ['nfle10', 'nfle11', 'nfle12', 'nfle13', 'nfle14', 'nfle15']
training_set_3 = ['plm10', 'plm1', 'plm2', 'plm3', 'plm5', 'plm6']
training_set_4 = ['rbd10', 'rbd11', 'rbd12', 'rbd13', 'rbd16', 'rbd17']

testing_set_1 = ['ins8', 'ins9', 'n5'] 
testing_set_2 = ['nfle16', 'nfle17', 'nfle18', 'nfle19'] 
testing_set_3 = ['plm7', 'plm8', 'plm9']
testing_set_4 = ['rbd18', 'rbd19', 'rbd20']

# Make sure you have the edf and txt files for each individual along with the GitHub modules.

for i in training_set_2:
    psg_data = PSG(i)

    #EEG Feature Extraction
    reload(eeg)
    EEG_columns = eeg.EEG(psg_data.data)
    EEG_columns.extract_features()
    eeg_data = EEG_columns.eeg_features

    #SAO2 Feature Extraction
    oxyData = psg_data.data[['epoch','SAO2']]
    sao2_data = SAO2(oxyData).get_SAO2_metrics()

    #EOG Feature Extraction
    eog_data = EOG(psg_data.data).get_EOG_metrics()

    #EMG Feature Extraction
    sxDfInput = psg_data.data.loc[:, ["elapsed_seconds", "epoch", "SX1-SX2"]]
    sx = EMG(sxDfInput, signalType = "SX1-SX2")
    sx_data = sx.getMetrics()

    dxDfInput = psg_data.data.loc[:, ["elapsed_seconds", "epoch", "DX1-DX2"]]
    dx = EMG(dxDfInput, signalType = "DX1-DX2")
    dx_data = dx.getMetrics()

    emgDfInput = psg_data.data.loc[:, ["elapsed_seconds", "epoch", "EMG1-EMG2"]]
    emg_columns = EMG(emgDfInput)
    emg_data = emg_columns.getMetrics()

    #ECG Feature Extraction
    ecgDfInput = psg_data.data.loc[:, ["epoch", "ECG1-ECG2"]]
    ecg_columns = ECG(ecgDfInput)
    ecg_data = ecg_columns.getMetrics()

    plethDfInput = psg_data.data.loc[:, ["elapsed_seconds", "epoch", "PLETH"]]
    pleth = ECG(plethDfInput, signalType = 'PLETH')
    pleth_data = pleth.getMetrics()

    #Combine and Export Data
    stage_1 = pd.merge(psg_data.txtData, eeg_data, how = 'left', on='epoch')
    stage_2 = pd.merge(stage_1, sao2_data, how = 'left', on='epoch')
    stage_3 = pd.merge(stage_2, eog_data, how = 'left', on='epoch')
    stage_4 = pd.merge(stage_3, sx_data, how = 'left', on='epoch')
    stage_5 = pd.merge(stage_4, dx_data, how = 'left', on='epoch')
    stage_6 = pd.merge(stage_5, emg_data, how = 'left', on='epoch')
    stage_7 = pd.merge(stage_6, ecg_data, how = 'left', on='epoch')
    final = pd.merge(stage_7, pleth_data, how = 'left', on='epoch')

    final.to_csv(i+'.csv')
    print(i+" successfully exported!")


In [None]:
# Data Transformation - Box Cox followed by Min-Max Scaling
final = pd.read_csv(i+'.csv')

final.dropna(subset = ['hf_PLETH'], inplace=True)

for column in final.columns[5:]:
    final[column] = final[column].abs()
        
    try:
        box_cox_trans = stats.boxcox(final[column])[0]
        normalized = ((box_cox_trans-box_cox_trans.mean())/box_cox_trans.std())
        final[column] = (normalized-normalized.min())/(normalized.max()-normalized.min())
        
    except:
        normalized = (final[column]-final[column].mean()) / final[column].std()
        final[column] = (normalized-normalized.min())/(normalized.max()-normalized.min())
        

In [None]:
# Dummy Encoding of Gender/Condition and Ordinal Encoding of Sleep Stage
# Drop 1st column (previous index from processing)
final.drop("Unnamed: 0", axis=1, inplace=True)

# Add columnS for sleep stage
final.insert(2, "sleep_stage", np.zeros(len(final)))

# Replace sleep stage string with numerical value
for stage in constants.SLEEP_STAGES:
    final.loc[final["Sleep Stage"] == stage, "sleep_stage"] = constants.SLEEP_STAGES.get(stage)

# Drop original sleep stage column
final.drop("Sleep Stage", axis=1, inplace=True)

# Replace sleep stage string with numerical value
for condition in constants.CONDITION_TO_BINARY:
    # Only check 1st row to confirm condition
    if condition == final.at[0, "condition"]:
        binary = constants.CONDITION_TO_BINARY.get(condition)

        # 1st Binary digit
        if binary[0] == 0:
            final.insert(3, "condition_0", np.zeros(len(final)))
        else:
            final.insert(3, "condition_0", np.ones(len(final)))

        # 2nd Binary digit
        if binary[1] == 0:
            final.insert(4, "condition_1", np.zeros(len(final)))
        else:
            final.insert(4, "condition_1", np.ones(len(final)))

        # 3rd Binary digit
        if binary[2] == 0:
            final.insert(5, "condition_2", np.zeros(len(final)))
        else:
            final.insert(5, "condition_2", np.ones(len(final)))
            

# Drop original sleep stage column
final.drop("condition", axis=1, inplace=True)