In [13]:
from pyedflib import highlevel
import pyedflib as plib
import numpy as np
import matplotlib.pyplot as plt
import statistics as stat

def read_file_signal(file_path, index):
    f = plib.EdfReader(file_path)
    signal = f.readSignal(index)
    f.close()
    return signal

def signal_labels(file_path):
    f = plib.EdfReader(file_path)
    labels = f.getSignalLabels()
    f.close()
    return labels

def combined_signal(file_list, index):
    combined_signals = []
    for file in file_list:
        temp = read_file_signal(file, index)
        combined_signals.append(temp)
    return np.concatenate(combined_signals)

def plot_signal(label, signal):
    plt.figure(figsize=(20, 10))
    plt.plot(signal)
    plt.title(f"{label} signal")
    plt.xlabel("time")
    plt.ylabel("amplitude")
    # plt.ylim(-300, 300)
    plt.show()

def thresholding(signal, threshold):
    mean = signal.mean()
    std = np.std(signal)
    lower_limit = mean - threshold * std
    upper_limit = mean + threshold * std
    filtered_signal = np.where((signal >= lower_limit) & (signal <= upper_limit), signal, mean)
    return filtered_signal

def resample_signal(signal, num_samples):
    original_indices = np.linspace(0, len(signal) - 1, num=len(signal))
    resampled_indices = np.linspace(0, len(signal) - 1, num=num_samples)
    resampled_signal = np.interp(resampled_indices, original_indices, signal)
    return resampled_signal

def mean_of_interval(signal, start, end):
    return stat.mean(signal[start:end])

edf_files_severe = [
    "c:\\Users\\anany\\Downloads\\00000349-297469[001].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[002].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[003].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[004].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[005].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[006].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[007].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[008].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[009].edf",
    "c:\\Users\\anany\\Downloads\\00000349-297469[010].edf"
]

edf_files_moderate = [
    "C:\\Users\\anany\\Downloads\\00000338-297469[001].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[002].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[003].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[004].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[005].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[006].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[007].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[008].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[009].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[010].edf",
    "C:\\Users\\anany\\Downloads\\00000338-297469[011].edf"
]

edf_files_mild = [
    "C:\\Users\\anany\\Downloads\\00000336-297469[001].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[002].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[003].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[004].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[005].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[006].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[007].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[008].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[009].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[010].edf",
    "C:\\Users\\anany\\Downloads\\00000336-297469[011].edf"
]

labels = signal_labels(edf_files_severe[0])
labels[16] = "Flow patient 1"
labels[17] = "Flow patient 2"
labels[23] = "Flow patient 3"
print(labels)

['EEG F3-A2', 'EEG F4-A1', 'EEG A1-A2', 'EEG C3-A2', 'EEG C4-A1', 'EEG O1-A2', 'EEG O2-A1', 'EOG LOC-A2', 'EOG ROC-A1', 'EMG Chin', 'Leg 1', 'Leg 2', 'ECG I', 'RR', 'ECG II', 'Snore', 'Flow patient 1', 'Flow patient 2', 'Effort THO', 'Effort ABD', 'SpO2', 'Pleth', 'Body', 'Flow patient 3', 'xPAP CPAP', 'xPAP IPAP', 'xPAP EPAP', 'Leak Total', 'PulseRate', 'PressCheck', 'ECG IIHF', 'Technical']


In [14]:
selected_indices = [0, 1, 2, 4, 5, 6, 7, 8, 9]

In [None]:
signal_11 = read_file_signal(edf_files_severe[0], 8)
signal_12 = read_file_signal(edf_files_severe[0], 9)
print(len(signal_11), len(signal_12))

720000 720000


In [16]:
signal_21 = read_file_signal(edf_files_moderate[0], 9)
signal_22 = read_file_signal(edf_files_moderate[0], 10)
print(len(signal_21), len(signal_22))

720000 720000


In [17]:
signal_31 = read_file_signal(edf_files_mild[0], 9)
signal_32 = read_file_signal(edf_files_mild[0], 10)
print(len(signal_31), len(signal_32))

720000 720000


In [18]:
channels_of_interest = [0, 1, 2, 4, 5, 6, 7, 8, 9]
channel_data = {ch: [] for ch in channels_of_interest}
sample_rates = {}

# First, check and store sample rates
edf = plib.EdfReader(edf_files_severe[0])
for ch in channels_of_interest:
    sample_rates[ch] = edf.getSampleFrequency(ch)
edf.close()

# Read and concatenate each channel across files
for file in edf_files_severe:
    f = plib.EdfReader(file)
    for ch in channels_of_interest:
        signal = f.readSignal(ch)
        channel_data[ch].append(signal)
    f.close()

# Concatenate all signals
for ch in channels_of_interest:
    channel_data[ch] = np.concatenate(channel_data[ch])

# Done! You now have:
# - `channel_data[ch]` as full 10-hour signal for channel `ch`
# - `sample_rates[ch]` as sample rate of each channel

print("Sample rates:", sample_rates)
print("Signal shape for channel 0:", channel_data[0].shape)

Sample rates: {0: 200.0, 1: 200.0, 2: 200.0, 4: 200.0, 5: 200.0, 6: 200.0, 7: 200.0, 8: 200.0, 9: 200.0}
Signal shape for channel 0: (6922200,)


In [19]:
import numpy as np
import xml.etree.ElementTree as ET
import pyedflib
from utils import main_pipeline  # Assuming you have a main_pipeline function defined in utils.py

def read_edf_signals(edf_paths, selected_indices):
    combined_signals = []

    for edf_path in edf_paths:
        f = pyedflib.EdfReader(edf_path)
        n = f.signals_in_file
        fs = int(f.getSampleFrequency(0))  # assuming all selected have same fs

        # Read and stack selected channels
        signals = []
        for i in selected_indices:
            signal = f.readSignal(i)
            signals.append(signal)
        f._close()
        del f

        signals = np.array(signals)
        combined_signals.append(signals)

    # Concatenate along time axis (i.e. axis=1)
    combined_signals = np.concatenate(combined_signals, axis=1)
    return combined_signals, fs


def parse_rml_to_labels_binary(rml_path, total_duration_secs):
    tree = ET.parse(rml_path)
    root = tree.getroot()

    stages = []
    for stage in root.findall(".//User/Stage"):
        stage_type = stage.get("Type")
        start_time = int(stage.get("Start"))
        stages.append((stage_type, start_time))

    stages.sort(key=lambda x: x[1])

    # Binary mapping: Wake=0, all else=1
    stage_mapping = {
        'Wake': 0,
        'NonREM1': 1,
        'NonREM2': 1,
        'NonREM3': 1,
        'REM': 1
    }

    labels = []

    for i in range(len(stages)):
        label = stage_mapping[stages[i][0]]
        start = stages[i][1]
        end = stages[i+1][1] if i + 1 < len(stages) else total_duration_secs
        duration = end - start
        num_epochs = duration // 30
        labels.extend([label] * num_epochs)

    return np.array(labels)

def parse_rml_to_labels_multi(rml_path, total_duration_secs):
    tree = ET.parse(rml_path)
    root = tree.getroot()

    stages = []
    for stage in root.findall(".//User/Stage"):
        stage_type = stage.get("Type")
        start_time = int(stage.get("Start"))
        stages.append((stage_type, start_time))

    stages.sort(key=lambda x: x[1])

    # Binary mapping: Wake=0, all else=1
    stage_mapping = {
        'Wake': 0,
        'NonREM1': 1,
        'NonREM2': 2,
        'NonREM3': 3,
        'REM': 4
    }

    labels = []

    for i in range(len(stages)):
        label = stage_mapping[stages[i][0]]
        start = stages[i][1]
        end = stages[i+1][1] if i + 1 < len(stages) else total_duration_secs
        duration = end - start
        num_epochs = duration // 30
        labels.extend([label] * num_epochs)

    return np.array(labels)


# Indices of channels you want to keep: 0,1,2,4,5,6,7,8,9

# Read and combine signal data
combined_signals, fs = read_edf_signals(edf_files_severe, selected_indices)

# Get total duration (in seconds)
total_duration_secs = combined_signals.shape[1] // fs

# Path to your RML file
rml_file_path = "C:\\Users\\anany\\Desktop\\OSA\\user_severe_349.rml"

# Parse RML to get binary labels
combined_labels = parse_rml_to_labels_binary(rml_file_path, total_duration_secs)

# Optional: Trim signals to match number of epochs
required_samples = len(combined_labels) * 30 * fs
combined_signals = combined_signals[:, :required_samples]

# Call your pipeline
main_pipeline(combined_signals, combined_labels, fs, "binary")


Preprocessing signals...
Preprocessed signals shape: (9, 6918000)
Preprocessing completed.
Segmentation started...
Segmented signals shape: (1153, 54000)
Number of labels: 1153
Feature extraction started...
Extracting features...
Ensuring consistent feature dimensions...
Number of feature dictionaries: 1153
Feature extraction completed.
Normalizing features...
Number of normalized feature dictionaries: 1153
Feature normalization completed.
Converting features to matrix...
Feature matrix shape: (1153, 151)
Feature conversion completed.
Filtering features based on mutual information...
Feature filtering completed.
Performing mRMR feature selection...
mRMR feature selection completed.
Training and evaluating SVM...
Training SVM...
SVM training completed in 0.00 seconds.
Evaluating SVM...
Confusion Matrix:
[[  2   9]
 [  0 220]]

Classification Report:
              precision    recall  f1-score   support

        Wake       1.00      0.18      0.31        11
       Sleep       0.96      1

In [20]:
combined_signals, fs = read_edf_signals(edf_files_severe, selected_indices)

# Get total duration (in seconds)
total_duration_secs = combined_signals.shape[1] // fs

# Path to your RML file
rml_file_path = "C:\\Users\\anany\\Desktop\\OSA\\user_severe_349.rml"

# Parse RML to get binary labels
combined_labels = parse_rml_to_labels_multi(rml_file_path, total_duration_secs)

# Optional: Trim signals to match number of epochs
required_samples = len(combined_labels) * 30 * fs
combined_signals = combined_signals[:, :required_samples]

# Call your pipeline
main_pipeline(combined_signals, combined_labels, fs, "multiclass")

Preprocessing signals...
Preprocessed signals shape: (9, 6918000)
Preprocessing completed.
Segmentation started...
Segmented signals shape: (1153, 54000)
Number of labels: 1153
Feature extraction started...
Extracting features...
Ensuring consistent feature dimensions...
Number of feature dictionaries: 1153
Feature extraction completed.
Normalizing features...
Number of normalized feature dictionaries: 1153
Feature normalization completed.
Converting features to matrix...
Feature matrix shape: (1153, 151)
Feature conversion completed.
Filtering features based on mutual information...
Feature filtering completed.
Performing mRMR feature selection...
mRMR feature selection completed.
Training and evaluating SVM...
Training SVM...
SVM training completed in 0.02 seconds.
Evaluating SVM...
Confusion Matrix:
[[ 3  0  1  4  3]
 [ 0  0  6  0 10]
 [ 0  0  8 30 18]
 [ 0  0  5 69  5]
 [ 0  0  5 14 50]]

Classification Report:
              precision    recall  f1-score   support

        Wake    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
combined_signals, fs = read_edf_signals(edf_files_moderate, selected_indices)

# Get total duration (in seconds)
total_duration_secs = combined_signals.shape[1] // fs

# Path to your RML file
rml_file_path = "C:\\Users\\anany\\Desktop\\OSA\\moderate_338.rml"

# Parse RML to get binary labels
combined_labels = parse_rml_to_labels_binary(rml_file_path, total_duration_secs)

# Optional: Trim signals to match number of epochs
required_samples = len(combined_labels) * 30 * fs
combined_signals = combined_signals[:, :required_samples]

# Call your pipeline
main_pipeline(combined_signals, combined_labels, fs, "binary")

Preprocessing signals...
Preprocessed signals shape: (9, 7230000)
Preprocessing completed.
Segmentation started...
Segmented signals shape: (1205, 54000)
Number of labels: 1205
Feature extraction started...
Extracting features...
Ensuring consistent feature dimensions...
Number of feature dictionaries: 1205
Feature extraction completed.
Normalizing features...
Number of normalized feature dictionaries: 1205
Feature normalization completed.
Converting features to matrix...
Feature matrix shape: (1205, 151)
Feature conversion completed.
Filtering features based on mutual information...
Feature filtering completed.
Performing mRMR feature selection...
mRMR feature selection completed.
Training and evaluating SVM...
Training SVM...
SVM training completed in 0.01 seconds.
Evaluating SVM...
Confusion Matrix:
[[ 40  17]
 [  3 181]]

Classification Report:
              precision    recall  f1-score   support

        Wake       0.93      0.70      0.80        57
       Sleep       0.91      0

In [22]:
combined_signals, fs = read_edf_signals(edf_files_moderate, selected_indices)

# Get total duration (in seconds)
total_duration_secs = combined_signals.shape[1] // fs

# Path to your RML file
rml_file_path = "C:\\Users\\anany\\Desktop\\OSA\\moderate_338.rml"

# Parse RML to get multi labels
combined_labels = parse_rml_to_labels_multi(rml_file_path, total_duration_secs)

# Optional: Trim signals to match number of epochs
required_samples = len(combined_labels) * 30 * fs
combined_signals = combined_signals[:, :required_samples]

# Call your pipeline
main_pipeline(combined_signals, combined_labels, fs, "multiclass")

Preprocessing signals...
Preprocessed signals shape: (9, 7230000)
Preprocessing completed.
Segmentation started...
Segmented signals shape: (1205, 54000)
Number of labels: 1205
Feature extraction started...
Extracting features...
Ensuring consistent feature dimensions...
Number of feature dictionaries: 1205
Feature extraction completed.
Normalizing features...
Number of normalized feature dictionaries: 1205
Feature normalization completed.
Converting features to matrix...
Feature matrix shape: (1205, 151)
Feature conversion completed.
Filtering features based on mutual information...
Feature filtering completed.
Performing mRMR feature selection...
mRMR feature selection completed.
Training and evaluating SVM...
Training SVM...
SVM training completed in 0.02 seconds.
Evaluating SVM...
Confusion Matrix:
[[44 11  1  1  0]
 [ 6 42 17  1  0]
 [ 1 25 18  8  0]
 [ 0  3  5 27  0]
 [ 0 29  2  0  0]]

Classification Report:
              precision    recall  f1-score   support

        Wake    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
combined_signals, fs = read_edf_signals(edf_files_mild, selected_indices)

# Get total duration (in seconds)
total_duration_secs = combined_signals.shape[1] // fs

# Path to your RML file
rml_file_path = "C:\\Users\\anany\\Desktop\\OSA\\user_mild_336.rml"

# Parse RML to get binary labels
combined_labels = parse_rml_to_labels_binary(rml_file_path, total_duration_secs)

# Optional: Trim signals to match number of epochs
required_samples = len(combined_labels) * 30 * fs
combined_signals = combined_signals[:, :required_samples]

# Call your pipeline
main_pipeline(combined_signals, combined_labels, fs, "binary")

Preprocessing signals...
Preprocessed signals shape: (9, 7458000)
Preprocessing completed.
Segmentation started...
Segmented signals shape: (1243, 54000)
Number of labels: 1243
Feature extraction started...
Extracting features...
Ensuring consistent feature dimensions...
Number of feature dictionaries: 1243
Feature extraction completed.
Normalizing features...
Number of normalized feature dictionaries: 1243
Feature normalization completed.
Converting features to matrix...
Feature matrix shape: (1243, 151)
Feature conversion completed.
Filtering features based on mutual information...
Feature filtering completed.
Performing mRMR feature selection...
mRMR feature selection completed.
Training and evaluating SVM...
Training SVM...
SVM training completed in 0.01 seconds.
Evaluating SVM...
Confusion Matrix:
[[ 31  38]
 [  3 177]]

Classification Report:
              precision    recall  f1-score   support

        Wake       0.91      0.45      0.60        69
       Sleep       0.82      0

In [25]:
combined_signals, fs = read_edf_signals(edf_files_mild, selected_indices)

# Get total duration (in seconds)
total_duration_secs = combined_signals.shape[1] // fs

# Path to your RML file
rml_file_path = "C:\\Users\\anany\\Desktop\\OSA\\user_mild_336.rml"

# Parse RML to get multi labels
combined_labels = parse_rml_to_labels_multi(rml_file_path, total_duration_secs)

# Optional: Trim signals to match number of epochs
required_samples = len(combined_labels) * 30 * fs
combined_signals = combined_signals[:, :required_samples]

# Call your pipeline
main_pipeline(combined_signals, combined_labels, fs, "multiclass")

Preprocessing signals...
Preprocessed signals shape: (9, 7458000)
Preprocessing completed.
Segmentation started...
Segmented signals shape: (1243, 54000)
Number of labels: 1243
Feature extraction started...
Extracting features...
Ensuring consistent feature dimensions...
Number of feature dictionaries: 1243
Feature extraction completed.
Normalizing features...
Number of normalized feature dictionaries: 1243
Feature normalization completed.
Converting features to matrix...
Feature matrix shape: (1243, 151)
Feature conversion completed.
Filtering features based on mutual information...
Feature filtering completed.
Performing mRMR feature selection...
mRMR feature selection completed.
Training and evaluating SVM...
Training SVM...
SVM training completed in 0.02 seconds.
Evaluating SVM...
Confusion Matrix:
[[49 15  5  0  0]
 [ 3 26 13  0  0]
 [ 1 17 40  8  0]
 [ 1  1 25 15  0]
 [ 4 25  1  0  0]]

Classification Report:
              precision    recall  f1-score   support

        Wake    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
