In [2]:
from pyedflib import highlevel
import pyedflib as plib
import numpy as np
import matplotlib.pyplot as plt
import statistics as stat

def read_file_signal(file_path, index):
    f = plib.EdfReader(file_path)
    signal = f.readSignal(index)
    f.close()
    return signal

def signal_labels(file_path):
    f = plib.EdfReader(file_path)
    labels = f.getSignalLabels()
    f.close()
    return labels

def combined_signal(file_list, index):
    combined_signals = []
    for file in file_list:
        temp = read_file_signal(file, index)
        combined_signals.append(temp)
    return np.concatenate(combined_signals)

def plot_signal(label, signal):
    plt.figure(figsize=(20, 10))
    plt.plot(signal)
    plt.title(f"{label} signal")
    plt.xlabel("time")
    plt.ylabel("amplitude")
    # plt.ylim(-300, 300)
    plt.show()

def thresholding(signal, threshold):
    mean = signal.mean()
    std = np.std(signal)
    lower_limit = mean - threshold * std
    upper_limit = mean + threshold * std
    filtered_signal = np.where((signal >= lower_limit) & (signal <= upper_limit), signal, mean)
    return filtered_signal

def resample_signal(signal, num_samples):
    original_indices = np.linspace(0, len(signal) - 1, num=len(signal))
    resampled_indices = np.linspace(0, len(signal) - 1, num=num_samples)
    resampled_signal = np.interp(resampled_indices, original_indices, signal)
    return resampled_signal

def mean_of_interval(signal, start, end):
    return stat.mean(signal[start:end])
#349 is severe, 336 is mild
edf_files_1 = [
    "c:\\Users\\anany\\Downloads\\00000349-297469[001].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[002].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[003].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[004].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[005].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[006].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[007].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[008].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[009].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[010].edf"
]

edf_files_2 = [ #normal
    "C:\\Users\\anany\\Downloads\\00000344-297469[001].edf",
    "C:\\Users\\anany\\Downloads\\00000344-297469[002].edf",
    "C:\\Users\\anany\\Downloads\\00000344-297469[003].edf",
    "C:\\Users\\anany\\Downloads\\00000344-297469[004].edf",
    "C:\\Users\\anany\\Downloads\\00000344-297469[005].edf",
    "C:\\Users\\anany\\Downloads\\00000344-297469[006].edf",
    "C:\\Users\\anany\\Downloads\\00000344-297469[007].edf",
    "C:\\Users\\anany\\Downloads\\00000344-297469[008].edf"
]

edf_files_3 = [ #moderate
    "C:\\Users\\anany\\Downloads\\00000338-297469[001].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[002].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[003].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[004].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[005].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[006].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[007].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[008].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[009].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[010].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[011].edf"
]

edf_files_4 = [ 
    "C:\\Users\\anany\\Downloads\\00000336-297469[001].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[002].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[003].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[004].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[005].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[006].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[007].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[008].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[009].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[010].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[011].edf"
]

labels = signal_labels(edf_files_1[0])
labels[16] = "Flow patient 1"
labels[17] = "Flow patient 2"
labels[23] = "Flow patient 3"
print(labels)

['EEG F3-A2', 'EEG F4-A1', 'EEG A1-A2', 'EEG C3-A2', 'EEG C4-A1', 'EEG O1-A2', 'EEG O2-A1', 'EOG LOC-A2', 'EOG ROC-A1', 'EMG Chin', 'Leg 1', 'Leg 2', 'ECG I', 'RR', 'ECG II', 'Snore', 'Flow patient 1', 'Flow patient 2', 'Effort THO', 'Effort ABD', 'SpO2', 'Pleth', 'Body', 'Flow patient 3', 'xPAP CPAP', 'xPAP IPAP', 'xPAP EPAP', 'Leak Total', 'PulseRate', 'PressCheck', 'ECG IIHF', 'Technical']


# severe

In [3]:
import xml.etree.ElementTree as ET

# Function to parse the XML and print stages
file_path = 'C:\\Users\\anany\\Desktop\\OSA\\user_severe_349.rml'
    # Parse the XML file
tree = ET.parse(file_path)
root = tree.getroot()

sleep_stages_severe_user = []    
# Iterate through each Stage element in the XML
for stage in root.findall('.//User/Stage'):
    stage_type = stage.get('Type')  # Get the 'Type' attribute
    start_time = stage.get('Start')  # Get the 'Start' attribute
    sleep_stages_severe_user.append((stage_type, start_time))

sleep_stages_severe_machine = []
for stage in root.findall('.//Machine/Stage'):
    stage_type = stage.get('Type')
    start_time = stage.get('Start')
    sleep_stages_severe_machine.append((stage_type, start_time))

def get_sleep_stage_at_time(sleep_stages, time):
    current_stage = None
    for stage_type, start_time in sleep_stages:
        if int(start_time) <= time:
            current_stage = stage_type
        else:
            break
    return current_stage

In [5]:
import pandas as pd

for i in range(32):
    signal = combined_signal(edf_files_1, i)
    resampled_signal = resample_signal(signal, 36000)
    thresholded_signal = thresholding(resampled_signal, 2)
    time = np.linspace(0, len(thresholded_signal) - 1, num=len(thresholded_signal))
    sleep_stage_user = [get_sleep_stage_at_time(sleep_stages_severe_user, int(t)) for t in time]
    sleep_stage_machine = [get_sleep_stage_at_time(sleep_stages_severe_machine, int(t)) for t in time]
    df = pd.DataFrame({'time': time, 'signal': thresholded_signal, 'user sleep stage': sleep_stage_user, 'machine sleep stage': sleep_stage_machine})
    df.to_csv(f"./severe_csvs/{labels[i]}.csv", index=False)


In [20]:
df = pd.read_csv("./severe_csvs/Flow patient 1.csv")
df.head()

Unnamed: 0,time,signal,user sleep stage,machine sleep stage
0,0.0,0.001526,Wake,Wake
1,1.0,-9.015111,Wake,Wake
2,2.0,-5.475272,Wake,Wake
3,3.0,-1.477687,Wake,Wake
4,4.0,-1.318615,Wake,Wake


# mild

In [6]:
import xml.etree.ElementTree as ET

# Function to parse the XML and print stages
file_path = 'C:\\Users\\anany\\Desktop\\OSA\\user_mild_336.rml'
    # Parse the XML file
tree = ET.parse(file_path)
root = tree.getroot()

sleep_stages_mild_user = []    
# Iterate through each Stage element in the XML
for stage in root.findall('.//User/Stage'):
    stage_type = stage.get('Type')  # Get the 'Type' attribute
    start_time = stage.get('Start')  # Get the 'Start' attribute
    sleep_stages_mild_user.append((stage_type, start_time))

sleep_stages_mild_machine = []
for stage in root.findall('.//Machine/Stage'):
    stage_type = stage.get('Type')
    start_time = stage.get('Start')
    sleep_stages_mild_machine.append((stage_type, start_time))

def get_sleep_stage_at_time(sleep_stages, time):
    current_stage = None
    for stage_type, start_time in sleep_stages:
        if int(start_time) <= time:
            current_stage = stage_type
        else:
            break
    return current_stage

In [7]:
import pandas as pd

for i in range(32):
    signal = combined_signal(edf_files_4, i)
    resampled_signal = resample_signal(signal, 36000)
    thresholded_signal = thresholding(resampled_signal, 2)
    time = np.linspace(0, len(thresholded_signal) - 1, num=len(thresholded_signal))
    sleep_stage_user = [get_sleep_stage_at_time(sleep_stages_mild_user, int(t)) for t in time]
    sleep_stage_machine = [get_sleep_stage_at_time(sleep_stages_mild_machine, int(t)) for t in time]
    df = pd.DataFrame({'time': time, 'signal': thresholded_signal, 'user sleep stage': sleep_stage_user, 'machine sleep stage': sleep_stage_machine})
    df.to_csv(f"./mild_csvs/{labels[i]}.csv", index=False)


# moderate

In [6]:
import xml.etree.ElementTree as ET

# Function to parse the XML and print stages
file_path = 'C:\\Users\\anany\\Desktop\\OSA\\moderate_338.rml'
    # Parse the XML file
tree = ET.parse(file_path)
root = tree.getroot()

sleep_stages_moderate_user = []    
# Iterate through each Stage element in the XML
for stage in root.findall('.//User/Stage'):
    stage_type = stage.get('Type')  # Get the 'Type' attribute
    start_time = stage.get('Start')  # Get the 'Start' attribute
    sleep_stages_moderate_user.append((stage_type, start_time))

sleep_stages_moderate_machine = []
for stage in root.findall('.//Machine/Stage'):
    stage_type = stage.get('Type')
    start_time = stage.get('Start')
    sleep_stages_moderate_machine.append((stage_type, start_time))

def get_sleep_stage_at_time(sleep_stages, time):
    current_stage = None
    for stage_type, start_time in sleep_stages:
        if int(start_time) <= time:
            current_stage = stage_type
        else:
            break
    return current_stage

In [8]:
import pandas as pd

for i in range(32):
    signal = combined_signal(edf_files_4, i)
    resampled_signal = resample_signal(signal, 36000)
    thresholded_signal = thresholding(resampled_signal, 2)
    time = np.linspace(0, len(thresholded_signal) - 1, num=len(thresholded_signal))
    sleep_stage_user = [get_sleep_stage_at_time(sleep_stages_moderate_user, int(t)) for t in time]
    sleep_stage_machine = [get_sleep_stage_at_time(sleep_stages_moderate_machine, int(t)) for t in time]
    df = pd.DataFrame({'time': time, 'signal': thresholded_signal, 'user sleep stage': sleep_stage_user, 'machine sleep stage': sleep_stage_machine})
    df.to_csv(f"./moderate_csvs/{labels[i]}.csv", index=False)


In [9]:
import pandas as pd

for i in range(32):
    signal = combined_signal(edf_files_2, i)
    resampled_signal = resample_signal(signal, 36000)
    thresholded_signal = thresholding(resampled_signal, 2)
    time = np.linspace(0, len(thresholded_signal) - 1, num=len(thresholded_signal))
    df = pd.DataFrame({'time': time, 'signal': thresholded_signal})
    df.to_csv(f"./normal_csvs/{labels[i]}.csv", index=False)
