# AUTOMATIC TIME SEGMENTATION OF PHYSIOLOGICAL DATA BASED ON SPO2 STATE

## Defining main functions

In [1]:
import pandas as pd
import re
from scipy.signal import lfilter
from scipy.signal import find_peaks
import numpy as np
import neurokit2 as nk
import logging
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import os
import openpyxl


### Data preparation

#### Loading file

In [2]:
def load_file(filename, dfmeta):
    df = pd.read_csv(filename)
    status, subject_id = re.search(r'(?P<cov_state>before|after)_(?P<subject_id>ID_\d{1,2})', filename).groups()

    logging.info(f'Status: {status}, Subject ID: {subject_id}')

    dfmeta.update({
        'filename':filename,
        'status':status,
        'subject_id':subject_id
    })    
    return df, dfmeta

#### Data preparation (renaming columns, adding index time, creating smoothed spo2, creating derivate lfilter)

##### Creating smoothed derivate (subfunction for data_prep)

In [3]:
def smooth_derivative(df):
    coefficients_L = [2 / (5 * 10), 1 / (5 * 10), 0, -1 / (5 * 10), -2 / (5 * 10)]
    df['spo2_deric'] = lfilter(coefficients_L, [1], df['spo2'])
    return df

##### Data prep function

In [4]:
import pandas as pd

def data_prep(df, dfmeta):
    time_factor = 5
    roll_w_size = 13
    column_rename_dict = {
        'Time': 'time',
        ' HR(bpm)': 'hr_bpm',
        ' SpO2(%)': 'spo2',
        ' PI(%)': 'pi',
        ' NIBP-S(mmHg)': 'bp_s',
        ' NIBP-D(mmHg)': 'bp_d',
        ' NIBP-M(mmHg)': 'bp_m',
        ' RR(rpm)': 'resp_rate',
        ' PR(bpm)': 'pulse_rate',
        ' PVCs(/min)': 'pvc_rate',
        ' Pauses(/min)': 'pause_rate',
        ' VPBs(/min)': 'VPB_rate',
        ' Couplets(/min)': 'couplet_rate',
        ' Missed beats(/min)': 'missed_beat_rate',
        ' R on Ts(/min)': 'r_on_t_rate',
        ' QT(ms)': 'qt',
        ' QTc(ms)': 'qtc',
        ' EtCO2(mmHg)': 'etco2_mmgh',
        ' FiCO2(mmHg)': 'fi_co2_mmhg',
        ' EtO2(%)': 'eto2_percent',
        ' FiO2(%)': 'fi_o2_percent'
    }

    # Rename columns
    df.rename(columns=column_rename_dict, inplace=True, errors='ignore')

    
    # Smooth derivative 
    #df = smooth_derivative(df)

    # Convert columns to numeric, replace errors with '--'
    columns_to_convert = ['hr_bpm', 'spo2', 'pi', 'bp_s', 'bp_d', 'bp_m', 'resp_rate', 
                          'pulse_rate', 'pvc_rate', 'pause_rate', 'VPB_rate', 
                          'couplet_rate', 'missed_beat_rate', 'r_on_t_rate', 
                          'qt', 'qtc', 'etco2_mmgh', 'fi_co2_mmhg', 'eto2_percent', 'fi_o2_percent']
    
    for col in columns_to_convert:
        if col in df.columns:
            # Replace '--' with NaN and assign back to the column
            df[col] = df[col].replace('--', np.nan)
            
            # Convert to numeric, coercing errors to NaN
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # Apply rolling mean smoothing
    df['spo2_smooth'] = df['spo2'].rolling(window=roll_w_size).mean().shift(-(roll_w_size - 1) // 2)
    df['stime'] = df.index * time_factor

    
    # Update metadata
    dfmeta.update({
        'time_factor': time_factor,
        'roll_w_size': roll_w_size,
    })

    return df, dfmeta


### Hypoxic drop detection and segmentation

#### Drop adaptive threshold crossing

In [5]:
def drop_threshold(df, dfmeta):
    # Calculate adaptive threshold for drop
    drop_a_thres = dfmeta['a_control'] * 100 + (1 - dfmeta['a_control']) * df['spo2_smooth'].min(skipna=True)
    logging.info(f'drop adaptive threshold: {drop_a_thres:.2f}')

    try:
        # Try to find the drop threshold crossing index
        drop_event = nk.events_find(
            df['spo2'],
            threshold=drop_a_thres,
            threshold_keep='below',
            start_at=dfmeta['start_cutoff'],
            duration_min=dfmeta['min_duration']
        )

        drop_thresX = drop_event['onset'][0]
        drop_duration = drop_event['duration'][0]

        logging.info(f'drop threshold cross index: {drop_thresX}')
        dfmeta.update({
            'drop_a_thres': drop_a_thres,
            'drop_thresX': drop_thresX,
            'drop_duration': drop_duration,
        })
    
    except (IndexError, KeyError) as e:
        # Handle the case when no drop threshold is found
        logging.warning(f'No drop threshold found or failed to calculate: {e}')
        dfmeta.update({
            'drop_a_thres': drop_a_thres,
            'drop_thresX': None,  # Set to None when no event is found
            'drop_duration': None
        })

    return dfmeta


In [6]:
# def drop_threshold(df, dfmeta):
#     # Calculate adaptive threshold for drop
#     drop_a_thres = dfmeta['a_control'] * 100 + (1 - dfmeta['a_control']) * df['spo2_smooth'].min(skipna=True)
#     logging.info(f'drop adaptive threshold: {drop_a_thres:.2f}')

#     try:
#         # Original code
#         drop_event = nk.events_find(
#             df['spo2'],
#             threshold=drop_a_thres,
#             threshold_keep='below',
#             start_at=dfmeta['start_cutoff'],
#             duration_min=dfmeta['min_duration']
#         )

#         # Try to find the drop threshold crossing index
#         drop_list = nk.events_find(
#             df['spo2'],
#             threshold=drop_a_thres,
#             threshold_keep='below',
#             start_at=dfmeta['start_cutoff'],
#             duration_min=2
#         )

#         # Az indexek azonosítása, ahol duration > 15
#         drop_remove_index = [i for i, dur in enumerate(drop_list["duration"]) if dur > 50]

#         # Az indexek eltávolítása minden kulcsból
#         for key in drop_list.keys():
#             drop_list[key] = [value for i, value in enumerate(drop_list[key]) if i not in drop_remove_index]

#         event_threshold = 20  # küszöbérték

#         i = 0
#         while i < len(drop_list['onset']) - 1:
#             curr_end = drop_list['onset'][i] + drop_list['duration'][i]
#             next_start = drop_list['onset'][i + 1]
#             next_event_len = next_start - curr_end

#             if next_event_len < event_threshold:
#                 drop_list['duration'][i] += next_event_len + drop_list['duration'][i + 1]
                
#                 for key in drop_list.keys():
#                     del drop_list[key][i + 1]  # Az (i+1)-edik elemet töröljük
#             else:
#                 i += 1
#         alt_drop_thresX = drop_list['onset'][0]
#         alt_recovery_thresX = drop_list['onset'][0] + drop_list['duration'][0]

#         print(drop_list)

#         logging.info(f'alt drop threshold cross index: {alt_drop_thresX}')
#         logging.info(f'alt RECOVERY cross index: {alt_drop_thresX}') 

#         drop_thresX = drop_event['onset'][0]
#         drop_duration = drop_event['duration'][0]

#         logging.info(f'drop threshold cross index: {drop_thresX}')
#         dfmeta.update({
#             'drop_a_thres': drop_a_thres,
#             'drop_thresX': drop_thresX,
#             'drop_duration': drop_duration,
#         })
    
#     except (IndexError, KeyError) as e:
#         # Handle the case when no drop threshold is found
#         logging.warning(f'No drop threshold found or failed to calculate: {e}')
#         dfmeta.update({
#             'drop_a_thres': drop_a_thres,
#             'drop_thresX': None,  # Set to None when no event is found
#             'drop_duration': None
#         })

#     return dfmeta


#### Finding hypoxic drop min/max index (~start/end) [drop main]

In [7]:
def drop_main(df, dfmeta):
    dfmeta = drop_threshold(df, dfmeta)  

    drop_thresX = dfmeta['drop_thresX']

    # Check if drop_thresX is None before proceeding
    if drop_thresX is None:
        logging.warning('drop_thresX is None, skipping drop-related calculations.')
        # Update dfmeta with None values for drop-related fields
        dfmeta.update({
            'drop_search_window_left': None,
            'drop_search_window_right': None,
            'drop_max_value': None,
            'drop_max_index': None,
            'drop_min_value': None,
            'drop_min_index': None,
        })
        return dfmeta

    # Calculate search windows and perform drop calculations if drop_thresX is not None
    drop_search_window_left = drop_thresX - dfmeta['drop_search_bracket_left']
    logging.info(f'drop_search_window_left: {drop_search_window_left}')
    drop_search_window_right = drop_thresX + dfmeta['drop_search_bracket_right']
    logging.info(f'drop_search_window_right: {drop_search_window_right}')

    drop_search_df = df['spo2'].loc[drop_search_window_left:drop_search_window_right]

    drop_max_value = drop_search_df.max()
    drop_max_index = drop_search_df[drop_search_df == drop_max_value].index[-1]
    logging.info(f'drop_max_value and index: {drop_max_value}, {drop_max_index}')

    drop_min_value = drop_search_df.min()
    drop_min_index = drop_search_df[drop_search_df == drop_min_value].index[0]
    logging.info(f'drop_min_value and index: {drop_min_value}, {drop_min_index}')

    prehypoxia_end_dt = pd.to_datetime(df.iloc[drop_max_index]['time'])
    hypoxia_start_dt = pd.to_datetime(df.iloc[drop_min_index]['time'])


    # Update dfmeta with the calculated values
    dfmeta.update({
        'drop_search_window_left': drop_search_window_left,
        'drop_search_window_right': drop_search_window_right,
        'drop_max_value': drop_max_value,
        'drop_max_index': drop_max_index,
        'drop_min_value': drop_min_value,
        'drop_min_index': drop_min_index,
        'prehypoxia_end_dt': prehypoxia_end_dt,
        'hypoxia_start_dt': hypoxia_start_dt,
    })

    return dfmeta


### Hypoxic recovery detection and segmentation

#### Recovery adaptive threshold crossing

In [8]:
def recovery_threshold(df, dfmeta):
    recovery_a_thres = dfmeta['r_control'] * 100 + (1 - dfmeta['r_control']) * df['spo2_smooth'].min(skipna=True)
    logging.info(f'recovery adaptive threshold: {recovery_a_thres:.2f}')

    df_len = len(df) - 1  # -1 index correction (to avoid by one indexing error)

    try:
                
        # Attempt to find the recovery threshold crossing
        reverse_recovery_thresX = nk.events_find(
            df['spo2'][::-1],
            threshold=recovery_a_thres,
            threshold_keep='below',
            start_at=dfmeta['end_cutoff'],
            duration_min=dfmeta['min_duration']
        )['onset'][0]

        logging.info(f'reverse recovery threshold cross index: {reverse_recovery_thresX}')
        
        recovery_thresX = df_len - reverse_recovery_thresX
        logging.info(f'recovery threshold cross index: {recovery_thresX}')

        dfmeta.update({
            'recovery_a_thres': recovery_a_thres,
            'recovery_thresX': recovery_thresX,
        })

    except (IndexError, KeyError) as e:
        logging.warning(f'No recovery threshold found or failed to calculate: {e}')
        dfmeta.update({
            'recovery_a_thres': recovery_a_thres,
            'recovery_thresX': None,  # Set to None when no event is found
        })

    return dfmeta

In [9]:
# # Alt 2 recovery threshold
# def recovery_threshold(df, dfmeta):
#     recovery_a_thres = dfmeta['r_control'] * 100 + (1 - dfmeta['r_control']) * df['spo2_smooth'].min(skipna=True)
#     logging.info(f'recovery adaptive threshold: {recovery_a_thres:.2f}')

#     df_len = len(df) - 1  # -1 index correction (to avoid by one indexing error)

#     try:
#         # # Attempt to find the recovery threshold crossing
#         # reverse_recovery_thresX = nk.events_find(
#         #     df['spo2'][::-1],
#         #     threshold=recovery_a_thres,
#         #     threshold_keep='below',
#         #     start_at=dfmeta['end_cutoff'],
#         #     duration_min=dfmeta['min_duration']
#         # )['onset'][0]

#         # logging.info(f'reverse recovery threshold cross index: {reverse_recovery_thresX}')
        
#         recovery_thresX = dfmeta['drop_thresX'] + dfmeta['drop_duration']
#         logging.info(f'recovery threshold cross index: {recovery_thresX}')

#         dfmeta.update({
#             'recovery_a_thres': recovery_a_thres,
#             'recovery_thresX': recovery_thresX,
#         })

#     except (IndexError, KeyError) as e:
#         logging.warning(f'No recovery threshold found or failed to calculate: {e}')
#         dfmeta.update({
#             'recovery_a_thres': recovery_a_thres,
#             'recovery_thresX': None,  # Set to None when no event is found
#         })

#     return dfmeta

#### Finding hypoxic recovery min/max index (~start/end) [recovery main]

In [10]:
def recovery_main(df, dfmeta):
    dfmeta = recovery_threshold(df, dfmeta) 

    recovery_thresX = dfmeta['recovery_thresX']

    # Check if recovery_thresX is None before proceeding
    if recovery_thresX is None:
        logging.warning('recovery_thresX is None, skipping recovery-related calculations.')
        # Update dfmeta with None values for recovery-related fields
        dfmeta.update({
            'recovery_search_window_left': None,
            'recovery_search_window_right': None,
            'recovery_max_value': None,
            'recovery_max_index': None,
            'recovery_min_value': None,
            'recovery_min_index': None,
        })
        return dfmeta

    # Calculate search windows and perform recovery calculations if recovery_thresX is not None
    recovery_search_window_left = recovery_thresX - dfmeta['recovery_search_bracket_left']
    logging.info(f'recovery_search_window_left: {recovery_search_window_left}')
    recovery_search_window_right = recovery_thresX + dfmeta['recovery_search_bracket_right']
    logging.info(f'recovery_search_window_right: {recovery_search_window_right}')

    recovery_search_df = df['spo2'].loc[recovery_search_window_left:recovery_search_window_right]

    recovery_max_value = recovery_search_df.max()
    recovery_max_index = recovery_search_df[recovery_search_df == recovery_max_value].index[0]
    logging.info(f'recovery_max_value and index: {recovery_max_value}, {recovery_max_index}')

    recovery_min_value = recovery_search_df.min()
    recovery_min_index = recovery_search_df[recovery_search_df == recovery_min_value].index[-1]
    logging.info(f'recovery_min_value and index: {recovery_min_value}, {recovery_min_index}')

    
    hypoxia_end_dt = pd.to_datetime(df.iloc[recovery_min_index]['time'])
    posthypoxia_start_dt = pd.to_datetime(df.iloc[recovery_max_index]['time'])


    # Update dfmeta with the calculated values
    dfmeta.update({
        'recovery_search_window_left': recovery_search_window_left,
        'recovery_search_window_right': recovery_search_window_right,
        'recovery_max_value': recovery_max_value,
        'recovery_max_index': recovery_max_index,
        'recovery_min_value': recovery_min_value,
        'recovery_min_index': recovery_min_index,
        'hypoxia_end_dt': hypoxia_end_dt,  
        'posthypoxia_start_dt': posthypoxia_start_dt,      
    })

    return dfmeta


### Calculating basic physiologica indicies

#### Basic indices calculation v1

In [11]:
# def physio_indicies(df, dfmeta):
#     start_skip = 3 * 12  # Skipping the first 3 minutes of data
#     end_skip = 3 * 12  # Skipping the last 3 minutes of data
#     drop_start = dfmeta.get('drop_max_index')
#     drop_end = dfmeta.get('drop_min_index')
#     recovery_start = dfmeta.get('recovery_min_index')
#     recovery_end = dfmeta.get('recovery_max_index')

#     # If any of the critical indices are None, skip calculations and return an empty DataFrame
#     if any(x is None for x in [drop_start, drop_end, recovery_start, recovery_end]):
#         logging.warning("Missing drop or recovery indices; skipping statistical calculations.")
#         return pd.DataFrame()  # Return an empty DataFrame to avoid further errors

#     # Padding between phases
#     phase_padding_pre_hypoxic = 4 * 12  # Padding for normoxic phases
#     phase_padding_hypoxic = 2 * 12  # Padding for hypoxic phases
#     phase_padding_post_normoxia = 1 * 12  # Padding for normoxic phases

#     # Define data slices for each phase
#     df_pre_normoxia = df.iloc[start_skip:max(drop_start - phase_padding_pre_hypoxic, 0)]
#     df_hypoxia = df.iloc[min(drop_end + phase_padding_hypoxic, len(df)):max(recovery_start - phase_padding_hypoxic, 0)]
#     df_post_normoxia = df.iloc[min(recovery_end + phase_padding_post_normoxia, len(df)):-end_skip]

#     df_drop = df.iloc[drop_start:drop_end]
#     df_recovery = df.iloc[recovery_start:recovery_end]

#     # Dictionary of phase names and their corresponding data
#     phases = {
#         'pre_normoxia': df_pre_normoxia,
#         'hypoxia': df_hypoxia,
#         'post_normoxia': df_post_normoxia,
#         'drop': df_drop,
#         'recovery': df_recovery
#     }

#     # Parameters to calculate statistics for
#     columns_to_calc = ['spo2', 'hr_bpm', 'pi', 'qtc']
#     stats_list = []

#     # Loop through each phase and parameter to calculate statistics
#     for phase_name, phase_data in phases.items():
#         for col in columns_to_calc:
#             if col not in phase_data.columns:
#                 logging.warning(f"Column '{col}' not found in phase '{phase_name}'. Skipping this parameter.")
#                 continue  # Skip this parameter if it does not exist
#                 if col in ['drop', 'recovery']:
#                     stats_dict = {
#                     'subject_id': dfmeta.get('subject_id', 'Unknown'),
#                     'status': dfmeta.get('status', 'Unknown'),
#                     'phase': phase_name,
#                     'parameter': col,
#                     'count': phase_data[col].count(),
#                     'min': phase_data[col].min(),
#                     'max': phase_data[col].max(),
#                     'mean': phase_data[col].mean(),
#                     'median': phase_data[col].median(),
#                     'std': phase_data[col].std(),
#                     'slope':
#                 }
#             stats_list.append(stats_dict)
#             # Calculate statistics safely
#             stats_dict = {
#                 'subject_id': dfmeta.get('subject_id', 'Unknown'),
#                 'status': dfmeta.get('status', 'Unknown'),
#                 'phase': phase_name,
#                 'parameter': col,
#                 'count': phase_data[col].count(),
#                 'min': phase_data[col].min(),
#                 'max': phase_data[col].max(),
#                 'mean': phase_data[col].mean(),
#                 'median': phase_data[col].median(),
#                 'std': phase_data[col].std(),
#             }
#             stats_list.append(stats_dict)

#     # Convert the list of statistics dictionaries to a DataFrame
#     stats_df = pd.DataFrame(stats_list)
#     return stats_df


#### Basic indices calculation v2

In [12]:
from scipy.stats import linregress

def physio_indicies(df, dfmeta):
    start_skip = 3 * 12  # Skipping the first 3 minutes of data
    end_skip = 3 * 12  # Skipping the last 3 minutes of data
    drop_start = dfmeta.get('drop_max_index')
    drop_end = dfmeta.get('drop_min_index')
    recovery_start = dfmeta.get('recovery_min_index')
    recovery_end = dfmeta.get('recovery_max_index')

    # If any of the critical indices are None, skip calculations and return an empty DataFrame
    if any(x is None for x in [drop_start, drop_end, recovery_start, recovery_end]):
        logging.warning("Missing drop or recovery indices; skipping statistical calculations.")
        return pd.DataFrame()  # Return an empty DataFrame to avoid further errors

    # Padding between phases
    phase_padding_pre_hypoxic = 4 * 12  # Padding for normoxic phases
    phase_padding_hypoxic = 2 * 12  # Padding for hypoxic phases
    phase_padding_post_normoxia = 1 * 12  # Padding for normoxic phases

    # Define data slices for each phase
    df_pre_normoxia = df.iloc[start_skip:max(drop_start - phase_padding_pre_hypoxic, 0)]
    df_hypoxia = df.iloc[min(drop_end + phase_padding_hypoxic, len(df)):max(recovery_start - phase_padding_hypoxic, 0)]
    df_post_normoxia = df.iloc[min(recovery_end + phase_padding_post_normoxia, len(df)):-end_skip]

    df_drop = df.iloc[drop_start:drop_end]
    df_recovery = df.iloc[recovery_start:recovery_end]

    # Dictionary of phase names and their corresponding data
    phases = {
        'pre_normoxia': df_pre_normoxia,
        'hypoxia': df_hypoxia,
        'post_normoxia': df_post_normoxia,
        'drop': df_drop,
        'recovery': df_recovery
    }

    # Parameters to calculate statistics for
    columns_to_calc = ['spo2', 'hr_bpm', 'pi', 'qtc']
    stats_list = []

    # Loop through each phase and parameter to calculate statistics
    for phase_name, phase_data in phases.items():
        for col in columns_to_calc:
            if col not in phase_data.columns:
                logging.warning(f"Column '{col}' not found in phase '{phase_name}'. Skipping this parameter.")
                continue  # Skip this parameter if it does not exist

            # Calculate basic statistics
            stats_dict = {
                'subject_id': dfmeta.get('subject_id', 'Unknown'),
                'status': dfmeta.get('status', 'Unknown'),
                'phase': phase_name,
                'parameter': col,
                'count': phase_data[col].count(),
                'min': phase_data[col].min(),
                'max': phase_data[col].max(),
                'mean': phase_data[col].mean(),
                'median': phase_data[col].median(),
                'std': phase_data[col].std(),
            }
            
            # Calculate the slope if there is enough data
            if phase_data[col].count() > 1:
                # Perform linear regression to get the slope
                time_indices = phase_data.index.to_series().reset_index(drop=True)
                slope, _, _, _, _ = linregress(time_indices, phase_data[col])
                stats_dict['slope'] = slope
            else:
                stats_dict['slope'] = None  # Insufficient data for slope calculation
            
            stats_list.append(stats_dict)

    # Convert the list of statistics dictionaries to a DataFrame
    stats_df = pd.DataFrame(stats_list)
    return stats_df


### Plotting functions

#### Basic indices plotting

In [13]:
def plot_statistics(stats_df, dfmeta):
    # Ensure stats_df is not empty and contains the 'parameter' column
    if stats_df.empty:
        logging.warning("The stats_df is empty. Skipping plotting.")
        return

    if 'parameter' not in stats_df.columns:
        logging.error("'parameter' column not found in stats_df. Unable to plot.")
        return

    # Define the parameters and statistics to plot
    parameters = stats_df['parameter'].unique()
    stats_to_plot = ['mean']  # List of statistics to plot, can be expanded

    # Define a lookup table for parameter names
    parameter_titles = {
        'spo2': 'SpO2 (%)',
        'hr_bpm': 'Heart Rate (bpm)',
        'pi': 'Perfusion Index',
        'qtc': 'QTc Interval (ms)',
        # Add more mappings as needed
    }

    # Create subplots, rows for each statistic and columns for each parameter
    fig = make_subplots(
        rows=len(stats_to_plot), 
        cols=len(parameters), 
        shared_yaxes=False, 
        subplot_titles=[parameter_titles.get(param, param) + f" - {stat}" for stat in stats_to_plot for param in parameters]
    )

    # Iterate over the stats and parameters to add the traces
    for row, stat in enumerate(stats_to_plot, start=1):
        for col, parameter in enumerate(parameters, start=1):
            # Filter the DataFrame for the current parameter
            filtered_df = stats_df[stats_df['parameter'] == parameter]

            # Check if the filtered DataFrame has data for the current parameter and statistic
            if filtered_df.empty:
                logging.warning(f"No data found for parameter '{parameter}' in stats_df. Skipping.")
                continue

            # Check if the current statistic exists in the DataFrame
            if stat not in filtered_df.columns:
                logging.warning(f"Statistic '{stat}' not found for parameter '{parameter}'. Skipping.")
                continue
            
            # Add a bar trace for the current parameter's statistic value across phases
            fig.add_trace(go.Bar(
                x=filtered_df['phase'],
                y=filtered_df[stat],
                name=f"{parameter} - {stat}",
                text=filtered_df[stat].round(2),  # Display the rounded values as text
                textposition='auto'
            ), row=row, col=col)

    # Update layout for better visualization
    fig.update_layout(
        title=f"{dfmeta.get('subject_id', 'Unknown')} {dfmeta.get('status', 'Unknown')}",
        xaxis_title='Phase',
        yaxis_title='Value',
        barmode='group',  # Group bars by phase within each subplot
        template='plotly_white',
        height=400 * len(stats_to_plot),  # Adjust height based on the number of rows
        legend_title='Parameters',
        showlegend=False  # Hide legend since parameter names are in subplot titles
    )

    # Show the combined plot
    fig.show()
    return fig


#### Drop/recovery point plotting

In [14]:

def dfmeta_plotter(df, dfmeta):
    indices_to_plot = [
        {'key_name': 'drop_treshX', 'index': dfmeta['drop_thresX'], 'color': 'red', 'marker': 'x'},
        {'key_name': 'drop_max_index', 'index': dfmeta['drop_max_index'], 'color': 'orange', 'marker': 'circle'},
        {'key_name': 'drop_min_index', 'index': dfmeta['drop_min_index'], 'color': 'orange', 'marker': 'circle'},
        {'key_name': 'recovery_thresX', 'index': dfmeta['recovery_thresX'], 'color': 'green', 'marker': 'x'},
        {'key_name': 'recovery_max_index', 'index': dfmeta['recovery_max_index'], 'color': 'limegreen', 'marker': 'circle'},
        {'key_name': 'recovery_min_index', 'index': dfmeta['recovery_min_index'], 'color': 'limegreen', 'marker': 'circle'},
    ]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['time'], y=df['spo2'], mode='lines', name='SpO2(%)'))

    for event in indices_to_plot:
        key_name = event['key_name']
        index = event['index']
        color = event['color']
        marker = event['marker']
        
        fig.add_trace(go.Scatter(
            x=[df['time'][index]], 
            y=[df['spo2'][index]],
            mode='markers+text',
            marker=dict(size=10, symbol=marker, color=color),  #  predefined marker style and color
            name='{}'.format(key_name),  # key name for legend
            text=['{}'.format(index)],  # Display index
            textposition='top center',  # Position of the text
        ))

    fig.update_layout(
        title=f"{dfmeta.get('subject_id', 'Unknown')} {dfmeta.get('status', 'Unknown')}",
        xaxis_title='Time (s)',
        yaxis_title='SpO2 (%)',
        legend_title='Events',
        template='plotly_white',
    )

    # Show the plot
    fig.show()
    return fig


#### Drop/recovery point and PHYSIO plotting

In [15]:
def dfmeta_plotter_multi(df, dfmeta):
    # Define the physiological parameters to plot
    parameters = ['spo2']  # Only 'spo2' since we're adding spo2_smooth to the same plot
    parameter_titles = {
        'spo2': 'SpO2 (%)',
        'spo2_smooth': 'Smoothed SpO2 (%)',  # Title for the smoothed data
        'hr_bpm': 'Heart Rate (bpm)',
        'pi': 'Perfusion Index',
        'qtc': 'QTc Interval (ms)'
    }
    
    # Define the events and their properties
    indices_to_plot = [
        {'key_name': 'drop_thresX', 'index': dfmeta['drop_thresX'], 'color': 'red', 'marker': 'x'},
        {'key_name': 'drop_max_index', 'index': dfmeta['drop_max_index'], 'color': 'orange', 'marker': 'circle'},
        {'key_name': 'drop_min_index', 'index': dfmeta['drop_min_index'], 'color': 'orange', 'marker': 'circle'},
        {'key_name': 'recovery_thresX', 'index': dfmeta['recovery_thresX'], 'color': 'green', 'marker': 'x'},
        {'key_name': 'recovery_max_index', 'index': dfmeta['recovery_max_index'], 'color': 'limegreen', 'marker': 'circle'},
        {'key_name': 'recovery_min_index', 'index': dfmeta['recovery_min_index'], 'color': 'limegreen', 'marker': 'circle'},
    ]

    # Create subplots with shared x-axis
    fig = make_subplots(
        rows=1, cols=1,  # One subplot for both traces
        shared_xaxes=True, 
        vertical_spacing=0.1,
        subplot_titles=[parameter_titles['spo2']]  # Only title for SpO2 plot
    )

    # Plot original spo2 data
    fig.add_trace(
        go.Scatter(
            x=df['time'], 
            y=df['spo2'], 
            mode='lines', 
            name='SpO2',
            opacity=1,
            line=dict(color='#636EFA'),
            showlegend=True  # Show legend for original SpO2
        )
    )

    # Plot smoothed spo2 data
    fig.add_trace(
        go.Scatter(
            x=df['time'], 
            y=df['spo2_smooth'], 
            opacity=0.8,
            mode='lines', 
            name='Smoothed SpO2',
            line=dict(color='orange'),  # Set the line color to navy blue
            showlegend=True  # Show legend for smoothed SpO2
        )
    )

    # Add event markers for each subplot, only if the index is not None
    for event in indices_to_plot:
        key_name = event['key_name']
        index = event['index']
        color = event['color']
        marker = event['marker']
        
        # Check if the index is not None and within the dataframe's index range
        if index is not None and index in df.index:
            fig.add_trace(
                go.Scatter(
                    x=[df['time'][index]], 
                    y=[df['spo2'][index]],  # Assuming markers are based on original SpO2
                    mode='markers+text',
                    marker=dict(size=10, symbol=marker, color=color),  # Predefined marker style and color
                    name='{} (SpO2)'.format(key_name),  # Key name for legend
                    text=[key_name],  # Display the event name
                    textposition='top center',  # Position of the text
                    showlegend=True  # Show legend for event markers
                )
            )

    # Update layout for the overall figure
    fig.update_layout(
        title=f"{dfmeta.get('subject_id', 'Unknown')} {dfmeta.get('status', 'Unknown')}",
        height=400,  # Fixed height since we only have one subplot
        xaxis_title='Time (s)',
        yaxis_title='Oxygen Saturation (%)',
        legend_title='Data Type',
        template='plotly_white',
    )

    # Show the plot
    fig.show()
    return fig


#### Saving multiple plot list to html

In [16]:
def save_plots_as_html(plots, output_file='combined_plots.html'):
    # Start with an HTML template
    html_content = '<html><head><title>Combined Plots</title></head><body>'

    # Check for NoneType entries and filter them out
    valid_plots = [fig for fig in plots if fig is not None]
    
    # Log the number of valid plots
    logging.info(f'Number of valid plots: {len(valid_plots)}')
    
    if not valid_plots:
        logging.warning('No valid plots to save.')
        return  # Early exit if no valid plots

    # Add each plot's HTML to the template
    for idx, fig in enumerate(valid_plots):
        try:
            html_content += pio.to_html(fig, full_html=False, include_plotlyjs='cdn')
        except Exception as e:
            logging.error(f"Failed to convert plot to HTML: {e}")
            continue  # Skip this plot if there's an error
        
        # Add a horizontal line after each plot except the last one
        if idx < len(valid_plots) - 1:
            html_content += '<hr style="border: 1px solid #000; margin: 20px 0;">'  # Horizontal line with styling

    # Close the HTML content
    html_content += '</body></html>'

    # Save the HTML content to a file
    with open(output_file, 'w') as f:
        f.write(html_content)

    logging.info(f'Saved plots to {output_file}')


### Misc functions

#### Renaming test folders

In [17]:
import os
import shutil
import pandas as pd

def rename_and_copy_folders(src_directory, dest_directory, log_file):
    # Ensure the destination directory exists
    os.makedirs(dest_directory, exist_ok=True)

    # Mapping to track IDs for base names and log data
    base_name_to_id = {}
    current_id = 1
    log_data = []

    # Iterate over each folder in the source directory
    for folder_name in os.listdir(src_directory):
        src_folder_path = os.path.join(src_directory, folder_name)

        # Ensure it's a folder
        if os.path.isdir(src_folder_path):
            # Extract base name and suffix
            if folder_name.endswith('u') or folder_name.endswith('e'):
                base_name = folder_name[:-1]  # Remove last character ('u' or 'e')
                suffix = folder_name[-1]  # Last character ('u' or 'e')

                # Assign an ID to the base name if not already mapped
                if base_name not in base_name_to_id:
                    base_name_to_id[base_name] = current_id
                    current_id += 1

                # Get the assigned ID
                assigned_id = base_name_to_id[base_name]

                # Determine the new folder name
                if suffix == 'u':
                    new_folder_name = f"after_ID_{assigned_id:02}"
                elif suffix == 'e':
                    new_folder_name = f"before_ID_{assigned_id:02}"
                else:
                    continue  # Skip if unexpected suffix

                # Define new destination folder path
                dest_folder_path = os.path.join(dest_directory, new_folder_name)
                
                # Copy the folder to the new location with the new name
                shutil.copytree(src_folder_path, dest_folder_path)

                # Check if 'ParameterData' subfolder exists
                parameter_data_path = os.path.join(dest_folder_path, "ParameterData")
                if os.path.exists(parameter_data_path) and os.path.isdir(parameter_data_path):
                    # Find the CSV file in the 'ParameterData' subfolder
                    for file_name in os.listdir(parameter_data_path):
                        if file_name.endswith('.csv'):
                            old_csv_path = os.path.join(parameter_data_path, file_name)
                            new_csv_name = f"{new_folder_name}.csv"
                            new_csv_path = os.path.join(parameter_data_path, new_csv_name)
                            
                            # Rename the CSV file
                            os.rename(old_csv_path, new_csv_path)
                            print(f"Renamed CSV: {file_name} -> {new_csv_name}")

                # Add to log
                log_data.append({"Original Name": folder_name, "New Name": new_folder_name})
                print(f"Copied and processed: {folder_name} -> {new_folder_name}")

    # Save the log data to an Excel file
    log_df = pd.DataFrame(log_data)
    log_df.to_excel(log_file, index=False)
    print(f"Log saved to: {log_file}")


### ECG process and HRV Calculation

#### Searching for ECG waveform file

In [18]:
def find_highest_priority_ecg_csv(folder_path, dfmeta, min_file_size=10000):
    """
    Find the highest-priority ECG CSV file in the 'WaveformData' subfolder of the given folder path,
    extract the sample rate from the corresponding description file, and add it to the dfmeta dictionary.
    Skips any ECG CSV files that are smaller than min_file_size (in bytes).
    
    Args:
        folder_path (str): The base folder path to search for the 'WaveformData' subfolder.
        dfmeta (dict): The dictionary that contains metadata.
        min_file_size (int): The minimum file size in bytes. Files smaller than this will be skipped.
    
    Returns:
        tuple: The path to the highest-priority ECG CSV file, and the updated dfmeta dictionary.
    """
    # Define the preference list
    ecg_preferences = ["V5", "V6", "V4", "II", "I", "AVF", "III", "V3", "AVL", "V2", "V1", "AVR"]

    # Construct the path to the 'WaveformData' subfolder
    waveform_data_path = os.path.join(folder_path, "WaveformData")

    # Check if 'WaveformData' exists and is a directory
    if not os.path.exists(waveform_data_path) or not os.path.isdir(waveform_data_path):
        logging.warning(f"'WaveformData' subfolder not found in {folder_path}.")
        return None, dfmeta

    # Collect all ECG CSV files matching the preference list
    ecg_files = []
    for file_name in os.listdir(waveform_data_path):
        if file_name.startswith("ECG_") and file_name.endswith(".csv"):
            # Extract the ECG type (e.g., "AVF", "II") from the filename (before the hyphen)
            ecg_type = file_name.split("-")[0].split("_")[1]  # Get the part after "ECG_" and before "-"
            
            if ecg_type in ecg_preferences:
                ecg_files.append((ecg_type, os.path.join(waveform_data_path, file_name)))

    # Sort the files by the preference order
    ecg_files.sort(key=lambda x: ecg_preferences.index(x[0]))

    # Iterate through the sorted ECG files and check their size
    for ecg_type, ecg_path in ecg_files:
        # Check if the file size is greater than the minimum required size
        if os.path.getsize(ecg_path) >= min_file_size:
            logging.info(f"Found valid ECG file: {ecg_path}")
            
            # Look for the description file (e.g., ECG_AVF_Description.txt)
            description_file = os.path.join(waveform_data_path, f"ECG_{ecg_type}_Description.txt")
            
            if os.path.exists(description_file):
                # Open the description file and extract the sample rate from the first line
                with open(description_file, 'r') as desc_file:
                    first_line = desc_file.readline().strip()
                    if first_line.startswith("SampleRate="):
                        sample_rate = first_line.split("=")[1]
                        # Add the sample rate to dfmeta
                        dfmeta[f'sample_rate'] = int(sample_rate)
                        logging.info(f"Sample rate for ECG_{ecg_type}: {sample_rate}")
                    else:
                        logging.warning(f"Sample rate not found in description file: {description_file}")
            
            return ecg_path, dfmeta

    # If no valid ECG file found
    logging.warning(f"No valid ECG files found (or all were too small) in {folder_path}.")
    return None, dfmeta


#### ECG data parsing

In [19]:
import pandas as pd
import logging

def parse_ecg_csv(ecg_path, sampling_rate=None):
    # Read the CSV file without headers
    df = pd.read_csv(ecg_path, header=None)
    
    # Calculate the number of columns
    num_columns = df.shape[1]  # Total number of columns in the dataframe
    num_ecg_columns = num_columns - 1  # Subtract 1 for the 'datetime' column

    # Check if the number of ECG columns matches the sampling rate (if provided)
    if sampling_rate is not None and num_ecg_columns != sampling_rate:
        logging.warning(f"ECG file at {ecg_path} has {num_ecg_columns} ECG columns, but the sampling rate is {sampling_rate}. This may indicate a mismatch.")

    # Ensure that there is at least one ECG column (besides datetime)
    if num_ecg_columns < 1:
        raise ValueError(f"ECG file at {ecg_path} has an invalid number of columns: {num_columns}. Expected at least 2 columns (datetime + ECG data).")
    
    # Generate column names for datetime and ECG data columns
    column_names = ['datetime'] + [f'ecg_{i}' for i in range(num_ecg_columns)]
    
    # Rename the columns
    df.columns = column_names
    
    # Convert the 'datetime' column to datetime objects
    df['datetime'] = pd.to_datetime(df['datetime'])
    
    # Melt the dataframe to make it long-form (timestamp, sample number, value)
    ecg_df = df.melt(id_vars=['datetime'], 
                        var_name='sample_number', 
                        value_name='ecg_value')
    
    # Extract the ECG sample number from the 'sample_number' column
    ecg_df['sample_number'] = ecg_df['sample_number'].str.extract(r'(\d+)').astype(int)
    
    # Sort by datetime and sample_number for a cleaner output
    ecg_df = ecg_df.sort_values(by=['datetime', 'sample_number']).reset_index(drop=True)
    
    return ecg_df


#### HRV calc function

In [20]:
import gc

def hrv_calculation(dfmeta, ecg_path, hrv_output=None):
    sampling_rate = int(dfmeta['sample_rate'])
    hypoxia_start_dt = dfmeta['hypoxia_start_dt']
    hypoxia_end_dt = dfmeta['hypoxia_end_dt']

    # Define the start time as 600 seconds before the end time
    start_time_hypox = hypoxia_end_dt - pd.Timedelta(seconds=600)

    # Parse original ECG data
    ecg_df_full = parse_ecg_csv(ecg_path, sampling_rate)

    # Create HYPOXIA sample from ecg_df_full based on the time range
    mask_hypox = (ecg_df_full['datetime'] >= start_time_hypox) & (ecg_df_full['datetime'] <= hypoxia_end_dt)
    ecg_df_hypox = ecg_df_full[mask_hypox]['ecg_value']

    # Calculate the hypoxic duration in seconds
    hypoxic_dt_length = int((hypoxia_end_dt - hypoxia_start_dt).total_seconds())
    dfmeta['hypoxic_dt_length'] = hypoxic_dt_length

    # Create PRE HYPOX
    mask_pre_hypox = (ecg_df_full['datetime'] >= (hypoxia_start_dt - pd.Timedelta(seconds=600))) & (ecg_df_full['datetime'] <= hypoxia_start_dt)
    ecg_df_prehypox = ecg_df_full[mask_pre_hypox]['ecg_value']
    prehypox_dt_length = int((hypoxia_start_dt - hypoxia_start_dt).total_seconds())
    dfmeta['prehypoxic_dt_length'] = prehypox_dt_length

    # Create POST HYPOX
    mask_post_hypox = (ecg_df_full['datetime'] >= hypoxia_end_dt) & (ecg_df_full['datetime'] <= (hypoxia_end_dt + pd.Timedelta(seconds=600)))
    ecg_df_posthypox = ecg_df_full[mask_post_hypox]['ecg_value']
    posthypox_dt_length = int((hypoxia_end_dt - hypoxia_start_dt).total_seconds())
    dfmeta['posthypoxic_dt_length'] = posthypox_dt_length    

    # Initialize hrv_output if it is None
    if hrv_output is None:
        hrv_output = pd.DataFrame()
    
    hrv_meta = pd.DataFrame()
    
        
    # Add metadata to hrv_indices
    hrv_meta['subject_id'] = dfmeta['subject_id']
    hrv_meta['status'] = dfmeta['status']
    hrv_meta['sample_rate'] = dfmeta['sample_rate']
    hrv_meta['hypoxic_total_seconds'] = hypoxic_dt_length
    hrv_meta['prehypoxic_total_seconds'] = prehypox_dt_length
    hrv_meta['posthypoxic_total_seconds'] = posthypox_dt_length

    # Concatenate the HRV indices to the output DataFrame
    hrv_output = pd.concat([hrv_output, hrv_meta], ignore_index=True)    


    # Check if ecg_df_hypox is empty and handle it
    if ecg_df_hypox.empty:
        logging.warning(f"No ECG data found within the specified time range: {start_time_hypox} to {hypoxia_end_dt} for {ecg_path}. Skipping this file.")
        return hrv_output, dfmeta  # Return the current output and metadata without processing this file.

    # Perform ECG processing and HRV calculation with Neurokit
    ecg_df_hypox, r_peaks_dict_hypox = nk.ecg_process(ecg_signal=ecg_df_hypox, sampling_rate=sampling_rate)
    hrv_indices_hypox = nk.hrv_time(r_peaks_dict_hypox['ECG_R_Peaks'], sampling_rate=sampling_rate, show=False)

    
    # Concatenate the HRV indices to the output DataFrame
    hrv_output = pd.concat([hrv_output, hrv_indices_hypox], ignore_index=True)    

    if ecg_df_prehypox.empty:
        logging.warning(f"No ECG data found within the specified PRE time range: {start_time_hypox} to {hypoxia_end_dt} for {ecg_path}. Skipping this file.")
        return hrv_output, dfmeta  # Return the current output and metadata without processing this file.    

    # Perform ECG processing and HRV calculation with Neurokit
    ecg_df_prehypox, r_peaks_dict_prehypox = nk.ecg_process(ecg_signal=ecg_df_prehypox, sampling_rate=sampling_rate)
    hrv_indices_prehypox = nk.hrv_time(r_peaks_dict_hypox['ECG_R_Peaks'], sampling_rate=sampling_rate, show=False)

    # Concatenate the HRV indices to the output DataFrame
    hrv_output = pd.concat([hrv_output, hrv_indices_prehypox], ignore_index=True) 

    if ecg_df_posthypox.empty:
        logging.warning(f"No ECG data found within the specified POST time range: {start_time_hypox} to {hypoxia_end_dt} for {ecg_path}. Skipping this file.")
        return hrv_output, dfmeta  # Return the current output and metadata without processing this file.        

    # Perform ECG processing and HRV calculation with Neurokit
    ecg_df_posthypox, r_peaks_dict_posthypox = nk.ecg_process(ecg_signal=ecg_df_posthypox, sampling_rate=sampling_rate)
    hrv_indices_posthypox = nk.hrv_time(r_peaks_dict_hypox['ECG_R_Peaks'], sampling_rate=sampling_rate, show=False)        

    # Concatenate the HRV indices to the output DataFrame
    hrv_output = pd.concat([hrv_output, hrv_indices_posthypox], ignore_index=True) 
   

    return hrv_output, dfmeta


#### HRV calc function v2

In [21]:
def hrv_calculation(dfmeta, ecg_path, hrv_output=None):
    sampling_rate = int(dfmeta['sample_rate'])
    hypoxia_start_dt = dfmeta['hypoxia_start_dt']
    hypoxia_end_dt = dfmeta['hypoxia_end_dt']

    # Parse original ECG data
    try:
        ecg_df_full = parse_ecg_csv(ecg_path, sampling_rate)
    except Exception as e:
        logging.error(f"Failed to parse ECG data: {e}")
        return hrv_output, dfmeta

    # Initialize hrv_output if it is None
    if hrv_output is None:
        hrv_output = pd.DataFrame()

    # Calculate durations for prehypoxic and posthypoxic periods
    first_datetime_index = ecg_df_full['datetime'].iloc[0]
    last_datetime_index = ecg_df_full['datetime'].iloc[-1]

    # Ensure datetime values exist in the data
    if pd.isnull(first_datetime_index) or pd.isnull(last_datetime_index):
        logging.error(f"Invalid datetime indices in ECG data.")
        return hrv_output, dfmeta

    prehypox_dt_length = int((hypoxia_start_dt - first_datetime_index).total_seconds())
    posthypox_dt_length = int((last_datetime_index - hypoxia_end_dt).total_seconds())
    hypoxic_dt_length = int((hypoxia_end_dt - hypoxia_start_dt).total_seconds())

    # Add these to dfmeta
    dfmeta['hypoxic_dt_length'] = hypoxic_dt_length
    dfmeta['prehypox_dt_length'] = prehypox_dt_length
    dfmeta['posthypox_dt_length'] = posthypox_dt_length

    # Add metadata to hrv_output
    metadata = {
        "subject_id": dfmeta['subject_id'],
        "status": dfmeta['status'],
        "sample_rate": dfmeta['sample_rate'],
        "hypoxic_total_seconds": hypoxic_dt_length,
        "prehypoxic_total_seconds": prehypox_dt_length,
        "posthypoxic_total_seconds": posthypox_dt_length,
    }

    hrv_output = pd.concat([hrv_output, pd.DataFrame([metadata])], ignore_index=True)

    # Create HYPOX, PRE, POST masks and HRV processing
    def process_segment(mask, segment_name):
        ecg_segment = ecg_df_full[mask]['ecg_value']
        if ecg_segment.empty:
            logging.warning(f"No ECG data in {segment_name} range. Skipping.")
            return None
        ecg_df, r_peaks = nk.ecg_process(ecg_signal=ecg_segment, sampling_rate=sampling_rate)
        return nk.hrv(r_peaks['ECG_R_Peaks'], sampling_rate=sampling_rate, show=False)

    # Define time masks for hypox, prehypox, and posthypox
    start_time_hypox = hypoxia_end_dt - pd.Timedelta(seconds=600)
    hrv_indices_hypox = process_segment(
        (ecg_df_full['datetime'] >= start_time_hypox) & (ecg_df_full['datetime'] <= hypoxia_end_dt),
        "HYPOX",
    )
    hrv_indices_prehypox = process_segment(
        (ecg_df_full['datetime'] >= first_datetime_index) & (ecg_df_full['datetime'] < hypoxia_start_dt),
        "PRE-HYPOX",
    )
    hrv_indices_posthypox = process_segment(
        (ecg_df_full['datetime'] > hypoxia_end_dt) & (ecg_df_full['datetime'] <= last_datetime_index),
        "POST-HYPOX",
    )

    # Concatenate HRV indices into hrv_output
    for hrv_indices in [hrv_indices_hypox, hrv_indices_prehypox, hrv_indices_posthypox]:
        if hrv_indices is not None:
            hrv_output = pd.concat([hrv_output, hrv_indices], ignore_index=True)

    return hrv_output, dfmeta


#### HRV calculation v3

In [22]:
def hrv_calculation(dfmeta, ecg_path, hrv_output=None):
    """
    Perform HRV calculation and ensure metadata, including 'phase', is included in each row of the output.
    Handles cases where segments are too short for meaningful analysis.
    """
    sampling_rate = int(dfmeta['sample_rate'])
    hypoxia_start_dt = dfmeta['hypoxia_start_dt']
    hypoxia_end_dt = dfmeta['hypoxia_end_dt']

    # Parse original ECG data
    try:
        ecg_df_full = parse_ecg_csv(ecg_path, sampling_rate)
    except Exception as e:
        logging.error(f"Failed to parse ECG data: {e}")
        return hrv_output, dfmeta

    # Initialize hrv_output if it is None
    if hrv_output is None:
        hrv_output = pd.DataFrame()

    # Get the first and last datetime indices
    first_datetime_index = ecg_df_full['datetime'].iloc[0]
    last_datetime_index = ecg_df_full['datetime'].iloc[-1]

    if pd.isnull(first_datetime_index) or pd.isnull(last_datetime_index):
        logging.error(f"Invalid datetime indices in ECG data.")
        return hrv_output, dfmeta

    # Pre-segment (start 1 minute after the first index, end closest to hypoxia_start_dt, max 600 seconds long)
    pre_start = first_datetime_index + pd.Timedelta(seconds=60)
    pre_end = min(hypoxia_start_dt, pre_start + pd.Timedelta(seconds=600))
    pre_segment_duration = max(0, int((pre_end - pre_start).total_seconds()))

    # Post-segment (start closest to hypoxia_end_dt, end 1 minute before the last index, max 600 seconds long)
    post_start = max(hypoxia_end_dt, last_datetime_index - pd.Timedelta(seconds=600) - pd.Timedelta(seconds=60))
    post_end = last_datetime_index - pd.Timedelta(seconds=60)
    post_segment_duration = max(0, int((post_end - post_start).total_seconds()))

    # Calculate durations for metadata
    hypoxic_dt_length = int((hypoxia_end_dt - hypoxia_start_dt).total_seconds())
    prehypox_dt_length = max(0, pre_segment_duration)
    posthypox_dt_length = max(0, post_segment_duration)

    # Add metadata to dfmeta
    dfmeta['hypoxic_dt_length'] = hypoxic_dt_length
    dfmeta['prehypox_dt_length'] = prehypox_dt_length
    dfmeta['posthypox_dt_length'] = posthypox_dt_length
    dfmeta['premask length (sec)'] = pre_segment_duration
    dfmeta['postmask length (sec)'] = post_segment_duration    

    # Add common metadata to all rows
    metadata = {
        "subject_id": dfmeta['subject_id'],
        "status": dfmeta['status'],
        "sample_rate": dfmeta['sample_rate'],
        "hypoxic_total_seconds": hypoxic_dt_length,
        "prehypoxic_total_seconds": prehypox_dt_length,
        "posthypoxic_total_seconds": posthypox_dt_length,
        'premask length (sec)': pre_segment_duration,
        'postmask length (sec)': post_segment_duration,
    }

    # Create a helper function to process segments
    def process_segment(mask, segment_name):
        ecg_segment = ecg_df_full[mask]['ecg_value']
        segment_length = len(ecg_segment) / sampling_rate  # Calculate length in seconds
        if segment_length < 30:
            logging.warning(f"{segment_name} segment is too short ({segment_length:.2f} seconds). Skipping HRV analysis.")
            return None
        ecg_df, r_peaks = nk.ecg_process(ecg_signal=ecg_segment, sampling_rate=sampling_rate)
        hrv_indices = nk.hrv(r_peaks['ECG_R_Peaks'], sampling_rate=sampling_rate, show=False)
        
        # Add metadata and phase information
        for col, val in metadata.items():
            hrv_indices[col] = val  # Add metadata to each row
        hrv_indices['phase'] = segment_name  # Add phase information
        return hrv_indices

    # Masks for each segment
    mask_hypox = (ecg_df_full['datetime'] >= (hypoxia_end_dt - pd.Timedelta(seconds=600))) & (ecg_df_full['datetime'] <= hypoxia_end_dt)
    mask_prehypox = (ecg_df_full['datetime'] >= pre_start) & (ecg_df_full['datetime'] <= pre_end)
    mask_posthypox = (ecg_df_full['datetime'] >= post_start) & (ecg_df_full['datetime'] <= post_end)

    # Process each segment
    hrv_indices_hypox = process_segment(mask_hypox, "HYPOX")
    hrv_indices_prehypox = process_segment(mask_prehypox, "PRE-HYPOX")
    hrv_indices_posthypox = process_segment(mask_posthypox, "POST-HYPOX")

    # Concatenate HRV indices into hrv_output
    for hrv_indices in [hrv_indices_prehypox, hrv_indices_hypox, hrv_indices_posthypox]:
        if hrv_indices is not None:
            hrv_output = pd.concat([hrv_output, hrv_indices], ignore_index=True)

    return hrv_output, dfmeta


### Hypoxic segmenter main function

In [23]:
# def segmenter_main(filename, dfmeta, segmenter_main_output_df, ecg_path, hrv_results):
#     # Load file from filename/path
#     df, dfmeta = load_file(filename, dfmeta)

#     # Prepare data
#     df, dfmeta = data_prep(df, dfmeta)

#     # Find hypoxic drop related features
#     dfmeta = drop_main(df, dfmeta)

#     # Find hypoxic recovery related features
#     dfmeta = recovery_main(df, dfmeta)

#     # Store all plots in a list
#     plots = []

#     # Plot drop/recovery points with physiological data and add to plots list
#     plot1 = dfmeta_plotter_multi(df, dfmeta)
#     if plot1 is not None:  # Check if plot is valid
#         plots.append(plot1)
#     else:
#         logging.warning(f'Plot1 returned None for file: {filename}')

#     # Calculate statistics for selected physiological parameters
#     stats_df = physio_indicies(df, dfmeta)
#     logging.info(f'stats_df:\n{stats_df}')
#     logging.info(f'dfmeta:\n{dfmeta}')

#     # Plot statistics from stats_df and add to plots list
#     #plot2 = plot_statistics(stats_df, dfmeta)
#     #if plot2 is not None:  # Check if plot is valid
#     #    plots.append(plot2)
#     #else:
#     #    logging.warning(f'Plot2 returned None for file: {filename}')

#     # Log the number of plots generated
#     logging.info(f'Number of plots generated for {filename}: {len(plots)}')

#     # Concatenate stats_df with the output DataFrame
#     segmenter_main_output_df = pd.concat([segmenter_main_output_df, stats_df], axis=0, ignore_index=True)

#     ## HRV CALCULATION ##
#     # create function level hrv_result dataframe for intermediate data storage
#     hrv_loop_results = pd.DataFrame()

#     # perform HRV calculation
#     hrv_loop_results, dfmeta = hrv_calculation(dfmeta, ecg_path, hrv_results)

#     #add results to main hrv database
#     hrv_results = pd.concat([hrv_results, hrv_loop_results], axis=0, ignore_index=True)


#     return df, dfmeta, segmenter_main_output_df, plots, hrv_results



In [29]:
def segmenter_main(filename, dfmeta, segmenter_main_output_df, ecg_path, hrv_results):
    # Load file from filename/path
    df, dfmeta = load_file(filename, dfmeta)

    # Prepare data
    df, dfmeta = data_prep(df, dfmeta)

    # Find hypoxic drop related features
    dfmeta = drop_main(df, dfmeta)

    # Find hypoxic recovery related features
    dfmeta = recovery_main(df, dfmeta)

    # Store all plots in a list
    plots = []

    # Plot drop/recovery points with physiological data and add to plots list
    plot1 = dfmeta_plotter_multi(df, dfmeta)
    if plot1 is not None:  # Check if plot is valid
        plots.append(plot1)
    else:
        logging.warning(f'Plot1 returned None for file: {filename}')

    # Calculate statistics for selected physiological parameters
    stats_df = physio_indicies(df, dfmeta)
    logging.info(f'stats_df:\n{stats_df}')
    logging.info(f'dfmeta:\n{dfmeta}')

    # Log the number of plots generated
    logging.info(f'Number of plots generated for {filename}: {len(plots)}')

    # Append stats_df rows to the output DataFrame
    if not stats_df.empty:
        segmenter_main_output_df = pd.concat([segmenter_main_output_df, stats_df], ignore_index=True)
        # After concatenation, remove duplicates based on all columns
        segmenter_main_output_df = segmenter_main_output_df.drop_duplicates()

    ## HRV CALCULATION ##
    # create function-level hrv_result dataframe for intermediate data storage
    hrv_loop_results = pd.DataFrame()

    # Perform HRV calculation
    #hrv_loop_results, dfmeta = hrv_calculation(dfmeta, ecg_path, hrv_results)

    # Append results to main hrv_results DataFrame
    if not hrv_loop_results.empty:
        hrv_results = pd.concat([hrv_results, hrv_loop_results], ignore_index=True)
        # After concatenation, remove duplicates based on all columns
        hrv_results = hrv_results.drop_duplicates()

    return df, dfmeta, segmenter_main_output_df, plots, hrv_results


## MAIN FUNCTION

In [25]:
import os
import logging
import pandas as pd

def main(segmenter_main_output_df, hrv_results, main_directory_path, save_path="intermediate_results.xlsx"):
    """
    Process renamed folder structure, iterating through each folder to find and process ECG files
    in the 'WaveformData' subfolder using the segmenter_main function.

    Args:
        segmenter_main_output_df (pd.DataFrame): DataFrame for storing segmenter_main outputs.
        hrv_results (pd.DataFrame): DataFrame for storing HRV results.
        main_directory_path (str): Path to the renamed folder structure.
        save_path (str): Path to save the intermediate results.

    Returns:
        tuple: Updated DataFrames (segmenter_main_output_df, hrv_results)
    """
    logging.basicConfig(level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

    # Metadata configuration
    dfmeta = {
        'a_control': 0.8,  # Adaptive hypoxia drop threshold parameter
        'start_cutoff': 2 * 12,
        'min_duration': 60,
        'drop_search_bracket_left': 20,
        'drop_search_bracket_right': 40,
        'end_cutoff': 3 * 12,
        'r_control': 0.90,
        'recovery_search_bracket_left': 25,
        'recovery_search_bracket_right': 15,
    }

    plots = []  # List to store all Plotly figures

    # Ensure intermediate save file exists or create a new one
    if not os.path.exists(save_path):
        with pd.ExcelWriter(save_path) as writer:
            segmenter_main_output_df.to_excel(writer, sheet_name="Segmenter_Main_Output", index=False)
            hrv_results.to_excel(writer, sheet_name="HRV_Results", index=False)
    else:
        logging.info(f"Loading intermediate results from {save_path}.")
        with pd.ExcelFile(save_path) as reader:
            segmenter_main_output_df = pd.read_excel(reader, sheet_name="Segmenter_Main_Output")
            hrv_results = pd.read_excel(reader, sheet_name="HRV_Results")

    # Iterate through each folder in the main directory
    for folder_name in os.listdir(main_directory_path):
        folder_path = os.path.join(main_directory_path, folder_name)

        if os.path.isdir(folder_path):
            parameter_data_path = os.path.join(folder_path, "ParameterData")

            # Check if 'ParameterData' subfolder exists
            if os.path.exists(parameter_data_path) and os.path.isdir(parameter_data_path):
                # Look for the renamed CSV file in the 'ParameterData' subfolder
                for filename in os.listdir(parameter_data_path):
                    if filename.endswith('.csv') and filename.startswith(folder_name):
                        file_path = os.path.join(parameter_data_path, filename)
                        logging.info(f'Processing file: {file_path}')

                        try:
                            # Find the highest-priority ECG CSV file from the corresponding folder
                            ecg_path, dfmeta = find_highest_priority_ecg_csv(folder_path, dfmeta)
                            if ecg_path:
                                logging.info(f'Highest-priority ECG file found: {ecg_path}')
                            else:
                                logging.warning(f'No suitable ECG file found for folder: {folder_path}')
                                continue  # Skip this folder

                            # Call segmenter_main on the found CSV file and pass the ECG file path
                            df, dfmeta, segmenter_main_output_df, file_plots, hrv_results = segmenter_main(
                                file_path, dfmeta, segmenter_main_output_df, ecg_path, hrv_results
                            )

                            # Append the generated plots from this file to the overall list, checking for None
                            valid_plots = [plot for plot in file_plots if plot is not None]
                            plots.extend(valid_plots)

                            # Save intermediate results
                            with pd.ExcelWriter(save_path) as writer:
                                segmenter_main_output_df.to_excel(writer, sheet_name="Segmenter_Main_Output", index=False)
                                hrv_results.to_excel(writer, sheet_name="HRV_Results", index=False)

                        except Exception as e:
                            logging.error(f"Error processing file {file_path}: {e}", exc_info=True)
                            continue  # Skip the file and proceed with the next one
                        break  # Exit after processing the first valid CSV in the subfolder

    # Save all the collected plots into one HTML file
    if plots:
        save_plots_as_html(plots, output_file='combined_plots.html')
    else:
        logging.warning('No valid plots to save.')

    logging.info(f"All processing complete. Results saved to {save_path}.")
    return segmenter_main_output_df, hrv_results


## RUN Main()

In [26]:
segmenter_main_output_df = pd.DataFrame()
hrv_results = pd.DataFrame()
main_directory_path = r'Data\batch_3_reID_2_clean_paired'

#logging.getLogger().setLevel(logging.CRITICAL)
logging.getLogger().setLevel(logging.WARNING)
#logging.getLogger().setLevel(logging.ERROR)
#logging.getLogger().setLevel(logging.INFO)

# Suppress logging
#logging.disable(logging.CRITICAL)
# Re-enable logging
#logging.disable(logging.NOTSET)

In [27]:
hrv_results

In [30]:
segmenter_main_output_df, hrv_results = main(segmenter_main_output_df, hrv_results, main_directory_path)

INFO:root:Loading intermediate results from intermediate_results.xlsx.
INFO:root:Processing file: Data\batch_3_reID_2_clean_paired\after_ID_01\ParameterData\after_ID_01.csv
INFO:root:Found valid ECG file: Data\batch_3_reID_2_clean_paired\after_ID_01\WaveformData\ECG_II-20210111121632~20210111130110.csv
INFO:root:Sample rate for ECG_II: 256
INFO:root:Highest-priority ECG file found: Data\batch_3_reID_2_clean_paired\after_ID_01\WaveformData\ECG_II-20210111121632~20210111130110.csv
INFO:root:Status: after, Subject ID: ID_01
INFO:root:drop adaptive threshold: 98.57
INFO:root:drop threshold cross index: 265
INFO:root:drop_search_window_left: 245
INFO:root:drop_search_window_right: 305
INFO:root:drop_max_value and index: 100.0, 263
INFO:root:drop_min_value and index: 94.0, 276
INFO:root:recovery adaptive threshold: 99.28
INFO:root:reverse recovery threshold cross index: 156
INFO:root:recovery threshold cross index: 448
INFO:root:recovery_search_window_left: 423
INFO:root:recovery_search_wind

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_01  after   pre_normoxia      spo2    136   95.00  100.00   
1       ID_01  after   pre_normoxia    hr_bpm    146   76.00  103.00   
2       ID_01  after   pre_normoxia        pi    136    1.01    6.83   
3       ID_01  after   pre_normoxia       qtc    132  384.00  401.00   
4       ID_01  after        hypoxia      spo2    112   93.00   97.00   
5       ID_01  after        hypoxia    hr_bpm    112   83.00  105.00   
6       ID_01  after        hypoxia        pi    112    1.50    3.54   
7       ID_01  after        hypoxia       qtc    112  397.00  400.00   
8       ID_01  after  post_normoxia      spo2    108   98.00  100.00   
9       ID_01  after  post_normoxia    hr_bpm    108   70.00   90.00   
10      ID_01  after  post_normoxia        pi    108    1.31    4.95   
11      ID_01  after  post_normoxia       qtc    108  398.00  414.00   
12      ID_01  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_03  after   pre_normoxia      spo2    233   95.00   99.00   
1       ID_03  after   pre_normoxia    hr_bpm    233   67.00   91.00   
2       ID_03  after   pre_normoxia        pi    233   12.20   20.00   
3       ID_03  after   pre_normoxia       qtc    233  382.00  392.00   
4       ID_03  after        hypoxia      spo2    124   84.00   96.00   
5       ID_03  after        hypoxia    hr_bpm    124   83.00   98.00   
6       ID_03  after        hypoxia        pi    124    5.98   18.10   
7       ID_03  after        hypoxia       qtc    124  377.00  389.00   
8       ID_03  after  post_normoxia      spo2     94   99.00  100.00   
9       ID_03  after  post_normoxia    hr_bpm     94   68.00   90.00   
10      ID_03  after  post_normoxia        pi     94    3.14   12.20   
11      ID_03  after  post_normoxia       qtc     94  384.00  391.00   
12      ID_03  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_04  after   pre_normoxia      spo2     55   96.00   98.00   
1       ID_04  after   pre_normoxia    hr_bpm     55   70.00   96.00   
2       ID_04  after   pre_normoxia        pi     55    9.83   12.90   
3       ID_04  after   pre_normoxia       qtc     55  381.00  397.00   
4       ID_04  after        hypoxia      spo2     94   85.00   94.00   
5       ID_04  after        hypoxia    hr_bpm     94   85.00  112.00   
6       ID_04  after        hypoxia        pi     94    3.78   10.00   
7       ID_04  after        hypoxia       qtc     94  389.00  404.00   
8       ID_04  after  post_normoxia      spo2     74   98.00  100.00   
9       ID_04  after  post_normoxia    hr_bpm     74   76.00   85.00   
10      ID_04  after  post_normoxia        pi     74    5.31    8.59   
11      ID_04  after  post_normoxia       qtc     74  391.00  401.00   
12      ID_04  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_05  after   pre_normoxia      spo2     91   94.00  100.00   
1       ID_05  after   pre_normoxia    hr_bpm    108   56.00  101.00   
2       ID_05  after   pre_normoxia        pi     92    0.82    4.41   
3       ID_05  after   pre_normoxia       qtc     94  409.00  429.00   
4       ID_05  after        hypoxia      spo2     85   88.00   98.00   
5       ID_05  after        hypoxia    hr_bpm     85   64.00  111.00   
6       ID_05  after        hypoxia        pi     80    0.45    1.51   
7       ID_05  after        hypoxia       qtc     85  420.00  434.00   
8       ID_05  after  post_normoxia      spo2     27   99.00  100.00   
9       ID_05  after  post_normoxia    hr_bpm     27   58.00   74.00   
10      ID_05  after  post_normoxia        pi     27    0.38    1.51   
11      ID_05  after  post_normoxia       qtc     27  433.00  435.00   
12      ID_05  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_06  after   pre_normoxia      spo2    221   96.00   99.00   
1       ID_06  after   pre_normoxia    hr_bpm    223   60.00   85.00   
2       ID_06  after   pre_normoxia        pi    221    2.65   14.30   
3       ID_06  after   pre_normoxia       qtc    210  376.00  389.00   
4       ID_06  after        hypoxia      spo2    120   93.00   95.00   
5       ID_06  after        hypoxia    hr_bpm    120   78.00  103.00   
6       ID_06  after        hypoxia        pi    120    1.99    4.92   
7       ID_06  after        hypoxia       qtc    120  381.00  388.00   
8       ID_06  after  post_normoxia      spo2    105   97.00  100.00   
9       ID_06  after  post_normoxia    hr_bpm    105   62.00   98.00   
10      ID_06  after  post_normoxia        pi    105    3.55   12.30   
11      ID_06  after  post_normoxia       qtc    105  383.00  391.00   
12      ID_06  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_07  after   pre_normoxia      spo2    257   97.00  100.00   
1       ID_07  after   pre_normoxia    hr_bpm    259   58.00   97.00   
2       ID_07  after   pre_normoxia        pi    258    0.64    4.66   
3       ID_07  after   pre_normoxia       qtc    245  421.00  433.00   
4       ID_07  after        hypoxia      spo2     98   90.00   98.00   
5       ID_07  after        hypoxia    hr_bpm     98   74.00   95.00   
6       ID_07  after        hypoxia        pi     98    0.45    2.61   
7       ID_07  after        hypoxia       qtc     98  440.00  443.00   
8       ID_07  after  post_normoxia      spo2     26   98.00  100.00   
9       ID_07  after  post_normoxia    hr_bpm     26   78.00   88.00   
10      ID_07  after  post_normoxia        pi     26    0.36    0.67   
11      ID_07  after  post_normoxia       qtc     26  444.00  447.00   
12      ID_07  after           drop      spo


Mean of empty slice

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_08  after   pre_normoxia      spo2      2   99.00   99.00   
1       ID_08  after   pre_normoxia    hr_bpm      2   84.00   86.00   
2       ID_08  after   pre_normoxia        pi      2    1.15    1.28   
3       ID_08  after   pre_normoxia       qtc      0     NaN     NaN   
4       ID_08  after        hypoxia      spo2     83   89.00   93.00   
5       ID_08  after        hypoxia    hr_bpm     83  102.00  124.00   
6       ID_08  after        hypoxia        pi     83    2.12    3.79   
7       ID_08  after        hypoxia       qtc     83  391.00  416.00   
8       ID_08  after  post_normoxia      spo2      0     NaN     NaN   
9       ID_08  after  post_normoxia    hr_bpm      0     NaN     NaN   
10      ID_08  after  post_normoxia        pi      0     NaN     NaN   
11      ID_08  after  post_normoxia       qtc      0     NaN     NaN   
12      ID_08  after  

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_09  after   pre_normoxia      spo2      9   99.00   99.00   
1       ID_09  after   pre_normoxia    hr_bpm     22   51.00   56.00   
2       ID_09  after   pre_normoxia        pi      9    4.67    6.72   
3       ID_09  after   pre_normoxia       qtc      8  430.00  430.00   
4       ID_09  after        hypoxia      spo2    100   83.00   92.00   
5       ID_09  after        hypoxia    hr_bpm    100   67.00   81.00   
6       ID_09  after        hypoxia        pi    100    1.33    4.21   
7       ID_09  after        hypoxia       qtc    100  426.00  430.00   
8       ID_09  after  post_normoxia      spo2     20  100.00  100.00   
9       ID_09  after  post_normoxia    hr_bpm     20   59.00   68.00   
10      ID_09  after  post_normoxia        pi     20    2.47    4.84   
11      ID_09  after  post_normoxia       qtc     20  419.00  434.00   
12      ID_09  after           drop      spo


Mean of empty slice

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_10  after   pre_normoxia      spo2     11   98.00   99.00   
1       ID_10  after   pre_normoxia    hr_bpm     16   73.00   82.00   
2       ID_10  after   pre_normoxia        pi     12    3.02   10.90   
3       ID_10  after   pre_normoxia       qtc      0     NaN     NaN   
4       ID_10  after        hypoxia      spo2     92   83.00   96.00   
5       ID_10  after        hypoxia    hr_bpm     92   88.00  103.00   
6       ID_10  after        hypoxia        pi     92    1.62    6.66   
7       ID_10  after        hypoxia       qtc     92  423.00  429.00   
8       ID_10  after  post_normoxia      spo2     70   96.00  100.00   
9       ID_10  after  post_normoxia    hr_bpm     70   68.00   90.00   
10      ID_10  after  post_normoxia        pi     70    1.91   10.10   
11      ID_10  after  post_normoxia       qtc     70  427.00  432.00   
12      ID_10  after  

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_12  after   pre_normoxia      spo2     89   95.00  100.00   
1       ID_12  after   pre_normoxia    hr_bpm    101   74.00   93.00   
2       ID_12  after   pre_normoxia        pi     90    1.87    5.62   
3       ID_12  after   pre_normoxia       qtc     87  399.00  426.00   
4       ID_12  after        hypoxia      spo2    107   83.00   93.00   
5       ID_12  after        hypoxia    hr_bpm    107   86.00   96.00   
6       ID_12  after        hypoxia        pi    107    4.78   10.90   
7       ID_12  after        hypoxia       qtc    107  428.00  433.00   
8       ID_12  after  post_normoxia      spo2     99   94.00  100.00   
9       ID_12  after  post_normoxia    hr_bpm     99   69.00   85.00   
10      ID_12  after  post_normoxia        pi     99    4.01   14.50   
11      ID_12  after  post_normoxia       qtc     99  426.00  440.00   
12      ID_12  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_13  after   pre_normoxia      spo2     61   96.00  100.00   
1       ID_13  after   pre_normoxia    hr_bpm     63   75.00  106.00   
2       ID_13  after   pre_normoxia        pi     62    1.73    3.10   
3       ID_13  after   pre_normoxia       qtc     49  439.00  468.00   
4       ID_13  after        hypoxia      spo2    113   86.00   95.00   
5       ID_13  after        hypoxia    hr_bpm    113   88.00  144.00   
6       ID_13  after        hypoxia        pi    113    1.70    3.82   
7       ID_13  after        hypoxia       qtc    113  447.00  459.00   
8       ID_13  after  post_normoxia      spo2     88   98.00  100.00   
9       ID_13  after  post_normoxia    hr_bpm     88   73.00  111.00   
10      ID_13  after  post_normoxia        pi     88    1.24    4.07   
11      ID_13  after  post_normoxia       qtc     88  452.00  460.00   
12      ID_13  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_14  after   pre_normoxia      spo2    128   95.00  100.00   
1       ID_14  after   pre_normoxia    hr_bpm    134   72.00   92.00   
2       ID_14  after   pre_normoxia        pi    129    2.09   10.10   
3       ID_14  after   pre_normoxia       qtc    121  424.00  436.00   
4       ID_14  after        hypoxia      spo2     86   89.00   94.00   
5       ID_14  after        hypoxia    hr_bpm     86   85.00  105.00   
6       ID_14  after        hypoxia        pi     86    1.95    7.55   
7       ID_14  after        hypoxia       qtc     86  420.00  425.00   
8       ID_14  after  post_normoxia      spo2     76   96.00  100.00   
9       ID_14  after  post_normoxia    hr_bpm     76   69.00  103.00   
10      ID_14  after  post_normoxia        pi     76    1.28    8.62   
11      ID_14  after  post_normoxia       qtc     76  423.00  433.00   
12      ID_14  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_15  after   pre_normoxia      spo2     27   97.00  100.00   
1       ID_15  after   pre_normoxia    hr_bpm     39   88.00  121.00   
2       ID_15  after   pre_normoxia        pi     27    0.74    1.68   
3       ID_15  after   pre_normoxia       qtc     24  419.00  421.00   
4       ID_15  after        hypoxia      spo2    105   87.00   94.00   
5       ID_15  after        hypoxia    hr_bpm    105   94.00  127.00   
6       ID_15  after        hypoxia        pi    105    0.83    1.38   
7       ID_15  after        hypoxia       qtc    105  413.00  421.00   
8       ID_15  after  post_normoxia      spo2     44   99.00  100.00   
9       ID_15  after  post_normoxia    hr_bpm     44   86.00  115.00   
10      ID_15  after  post_normoxia        pi     44    0.50    0.91   
11      ID_15  after  post_normoxia       qtc     44  403.00  424.00   
12      ID_15  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_18  after   pre_normoxia      spo2    102   94.00   99.00   
1       ID_18  after   pre_normoxia    hr_bpm    102   82.00   93.00   
2       ID_18  after   pre_normoxia        pi    102    2.79    7.17   
3       ID_18  after   pre_normoxia       qtc    102  430.00  433.00   
4       ID_18  after        hypoxia      spo2     93   84.00   93.00   
5       ID_18  after        hypoxia    hr_bpm     93   84.00  101.00   
6       ID_18  after        hypoxia        pi     93    2.35    5.34   
7       ID_18  after        hypoxia       qtc     93  431.00  435.00   
8       ID_18  after  post_normoxia      spo2      8  100.00  100.00   
9       ID_18  after  post_normoxia    hr_bpm      8   78.00   80.00   
10      ID_18  after  post_normoxia        pi      8    5.36    8.45   
11      ID_18  after  post_normoxia       qtc      8  440.00  440.00   
12      ID_18  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_19  after   pre_normoxia      spo2     45   98.00  100.00   
1       ID_19  after   pre_normoxia    hr_bpm     45   80.00   95.00   
2       ID_19  after   pre_normoxia        pi     45    2.71    5.08   
3       ID_19  after   pre_normoxia       qtc     45  408.00  414.00   
4       ID_19  after        hypoxia      spo2    139   87.00   96.00   
5       ID_19  after        hypoxia    hr_bpm    139   78.00  118.00   
6       ID_19  after        hypoxia        pi    139    2.28    5.82   
7       ID_19  after        hypoxia       qtc    139  416.00  426.00   
8       ID_19  after  post_normoxia      spo2    108   99.00  100.00   
9       ID_19  after  post_normoxia    hr_bpm    108   74.00   91.00   
10      ID_19  after  post_normoxia        pi    108    1.86    7.42   
11      ID_19  after  post_normoxia       qtc    108  417.00  428.00   
12      ID_19  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_23  after   pre_normoxia      spo2    144   96.00  100.00   
1       ID_23  after   pre_normoxia    hr_bpm    144   70.00   91.00   
2       ID_23  after   pre_normoxia        pi    144    0.45    5.62   
3       ID_23  after   pre_normoxia       qtc    140  425.00  445.00   
4       ID_23  after        hypoxia      spo2     99   93.00   97.00   
5       ID_23  after        hypoxia    hr_bpm     99   77.00   92.00   
6       ID_23  after        hypoxia        pi     99    2.22    6.59   
7       ID_23  after        hypoxia       qtc     99  427.00  446.00   
8       ID_23  after  post_normoxia      spo2     42  100.00  100.00   
9       ID_23  after  post_normoxia    hr_bpm     42   79.00  115.00   
10      ID_23  after  post_normoxia        pi     42    1.79    4.35   
11      ID_23  after  post_normoxia       qtc     42  439.00  454.00   
12      ID_23  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_25  after   pre_normoxia      spo2     51   96.00   99.00   
1       ID_25  after   pre_normoxia    hr_bpm     55   90.00  110.00   
2       ID_25  after   pre_normoxia        pi     51    1.32    3.19   
3       ID_25  after   pre_normoxia       qtc     41  413.00  417.00   
4       ID_25  after        hypoxia      spo2    111   87.00   95.00   
5       ID_25  after        hypoxia    hr_bpm    111   87.00  129.00   
6       ID_25  after        hypoxia        pi    111    1.24    3.74   
7       ID_25  after        hypoxia       qtc    111  412.00  431.00   
8       ID_25  after  post_normoxia      spo2      7  100.00  100.00   
9       ID_25  after  post_normoxia    hr_bpm      7   86.00   95.00   
10      ID_25  after  post_normoxia        pi      7    1.28    2.57   
11      ID_25  after  post_normoxia       qtc      7  425.00  430.00   
12      ID_25  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_27  after   pre_normoxia      spo2     28   99.00  100.00   
1       ID_27  after   pre_normoxia    hr_bpm     28   71.00   85.00   
2       ID_27  after   pre_normoxia        pi     28    1.76    4.87   
3       ID_27  after   pre_normoxia       qtc     28  378.00  389.00   
4       ID_27  after        hypoxia      spo2    124   89.00   96.00   
5       ID_27  after        hypoxia    hr_bpm    124   79.00  104.00   
6       ID_27  after        hypoxia        pi    124    1.36    3.35   
7       ID_27  after        hypoxia       qtc    124  380.00  390.00   
8       ID_27  after  post_normoxia      spo2    105   87.00  100.00   
9       ID_27  after  post_normoxia    hr_bpm    109   71.00   89.00   
10      ID_27  after  post_normoxia        pi    105    0.49    6.19   
11      ID_27  after  post_normoxia       qtc    109  385.00  392.00   
12      ID_27  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_28  after   pre_normoxia      spo2    109   99.00  100.00   
1       ID_28  after   pre_normoxia    hr_bpm    109   82.00   95.00   
2       ID_28  after   pre_normoxia        pi    109    0.99    4.60   
3       ID_28  after   pre_normoxia       qtc    109  380.00  389.00   
4       ID_28  after        hypoxia      spo2    132   86.00   96.00   
5       ID_28  after        hypoxia    hr_bpm    132   93.00  113.00   
6       ID_28  after        hypoxia        pi    132    1.28    2.81   
7       ID_28  after        hypoxia       qtc    132  389.00  396.00   
8       ID_28  after  post_normoxia      spo2    266   98.00  100.00   
9       ID_28  after  post_normoxia    hr_bpm    269   74.00   94.00   
10      ID_28  after  post_normoxia        pi    266    0.25    5.04   
11      ID_28  after  post_normoxia       qtc    269  384.00  400.00   
12      ID_28  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_30  after   pre_normoxia      spo2    188   97.00  100.00   
1       ID_30  after   pre_normoxia    hr_bpm    188   79.00  140.00   
2       ID_30  after   pre_normoxia        pi    188    0.61    2.77   
3       ID_30  after   pre_normoxia       qtc    188  410.00  421.00   
4       ID_30  after        hypoxia      spo2    134   91.00   96.00   
5       ID_30  after        hypoxia    hr_bpm    134  100.00  133.00   
6       ID_30  after        hypoxia        pi    134    0.54    1.82   
7       ID_30  after        hypoxia       qtc    134  412.00  423.00   
8       ID_30  after  post_normoxia      spo2    186   99.00  100.00   
9       ID_30  after  post_normoxia    hr_bpm    187   84.00  111.00   
10      ID_30  after  post_normoxia        pi    186    0.50    3.41   
11      ID_30  after  post_normoxia       qtc    187  421.00  426.00   
12      ID_30  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_31  after   pre_normoxia      spo2     71   96.00   99.00   
1       ID_31  after   pre_normoxia    hr_bpm     71   74.00   97.00   
2       ID_31  after   pre_normoxia        pi     71    4.78   10.30   
3       ID_31  after   pre_normoxia       qtc     62  408.00  415.00   
4       ID_31  after        hypoxia      spo2    100   85.00   90.00   
5       ID_31  after        hypoxia    hr_bpm    100  116.00  139.00   
6       ID_31  after        hypoxia        pi    100    1.32    3.80   
7       ID_31  after        hypoxia       qtc    100  413.00  429.00   
8       ID_31  after  post_normoxia      spo2    182   97.00  100.00   
9       ID_31  after  post_normoxia    hr_bpm    182   62.00  116.00   
10      ID_31  after  post_normoxia        pi    182    1.02    9.06   
11      ID_31  after  post_normoxia       qtc    182  411.00  460.00   
12      ID_31  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_32  after   pre_normoxia      spo2      2   98.00   98.00   
1       ID_32  after   pre_normoxia    hr_bpm      2   89.00   90.00   
2       ID_32  after   pre_normoxia        pi      2    3.79    4.38   
3       ID_32  after   pre_normoxia       qtc      2  407.00  407.00   
4       ID_32  after        hypoxia      spo2     92   86.00   96.00   
5       ID_32  after        hypoxia    hr_bpm     92  107.00  119.00   
6       ID_32  after        hypoxia        pi     92    1.80    3.57   
7       ID_32  after        hypoxia       qtc     92  404.00  419.00   
8       ID_32  after  post_normoxia      spo2     34   97.00  100.00   
9       ID_32  after  post_normoxia    hr_bpm     35   92.00  114.00   
10      ID_32  after  post_normoxia        pi     35    1.76   20.00   
11      ID_32  after  post_normoxia       qtc     35  418.00  421.00   
12      ID_32  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_33  after   pre_normoxia      spo2     83   97.00  100.00   
1       ID_33  after   pre_normoxia    hr_bpm     89   76.00  111.00   
2       ID_33  after   pre_normoxia        pi     84    1.04    2.02   
3       ID_33  after   pre_normoxia       qtc     75  426.00  459.00   
4       ID_33  after        hypoxia      spo2    122   90.00   96.00   
5       ID_33  after        hypoxia    hr_bpm    122   86.00  131.00   
6       ID_33  after        hypoxia        pi    122    1.08    1.66   
7       ID_33  after        hypoxia       qtc    122  447.00  482.00   
8       ID_33  after  post_normoxia      spo2     99   99.00  100.00   
9       ID_33  after  post_normoxia    hr_bpm     99   72.00  112.00   
10      ID_33  after  post_normoxia        pi     99    1.01    3.47   
11      ID_33  after  post_normoxia       qtc     99  433.00  485.00   
12      ID_33  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_34  after   pre_normoxia      spo2    104   98.00  100.00   
1       ID_34  after   pre_normoxia    hr_bpm    109   88.00  124.00   
2       ID_34  after   pre_normoxia        pi    105    1.17    4.32   
3       ID_34  after   pre_normoxia       qtc    100  407.00  416.00   
4       ID_34  after        hypoxia      spo2     55   80.00   89.00   
5       ID_34  after        hypoxia    hr_bpm     55  136.00  150.00   
6       ID_34  after        hypoxia        pi     55    0.78    1.69   
7       ID_34  after        hypoxia       qtc     55  412.00  421.00   
8       ID_34  after  post_normoxia      spo2    109   97.00  100.00   
9       ID_34  after  post_normoxia    hr_bpm    109   87.00  120.00   
10      ID_34  after  post_normoxia        pi    109    1.49    6.17   
11      ID_34  after  post_normoxia       qtc    109  416.00  424.00   
12      ID_34  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_35  after   pre_normoxia      spo2    140   96.00   99.00   
1       ID_35  after   pre_normoxia    hr_bpm    143   73.00   95.00   
2       ID_35  after   pre_normoxia        pi    140    8.60   20.00   
3       ID_35  after   pre_normoxia       qtc    128  419.00  437.00   
4       ID_35  after        hypoxia      spo2    118   87.00   96.00   
5       ID_35  after        hypoxia    hr_bpm    118   70.00  102.00   
6       ID_35  after        hypoxia        pi    118    2.02    5.99   
7       ID_35  after        hypoxia       qtc    118  423.00  441.00   
8       ID_35  after  post_normoxia      spo2     52  100.00  100.00   
9       ID_35  after  post_normoxia    hr_bpm     52   79.00   90.00   
10      ID_35  after  post_normoxia        pi     52    2.35   11.90   
11      ID_35  after  post_normoxia       qtc     52  429.00  441.00   
12      ID_35  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_37  after   pre_normoxia      spo2     28   98.00  100.00   
1       ID_37  after   pre_normoxia    hr_bpm     28   77.00   84.00   
2       ID_37  after   pre_normoxia        pi     28    1.01    1.39   
3       ID_37  after   pre_normoxia       qtc     25  391.00  398.00   
4       ID_37  after        hypoxia      spo2    105   86.00   95.00   
5       ID_37  after        hypoxia    hr_bpm    105   91.00  108.00   
6       ID_37  after        hypoxia        pi    105    1.25    2.61   
7       ID_37  after        hypoxia       qtc    105  396.00  410.00   
8       ID_37  after  post_normoxia      spo2     27   99.00  100.00   
9       ID_37  after  post_normoxia    hr_bpm     31   79.00  110.00   
10      ID_37  after  post_normoxia        pi     27    1.21    6.73   
11      ID_37  after  post_normoxia       qtc     31  396.00  412.00   
12      ID_37  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_38  after   pre_normoxia      spo2     90   91.00  100.00   
1       ID_38  after   pre_normoxia    hr_bpm     99   78.00  100.00   
2       ID_38  after   pre_normoxia        pi     91    2.28    7.38   
3       ID_38  after   pre_normoxia       qtc     86  432.00  443.00   
4       ID_38  after        hypoxia      spo2     81   90.00   95.00   
5       ID_38  after        hypoxia    hr_bpm     81   78.00  101.00   
6       ID_38  after        hypoxia        pi     81    1.99    5.74   
7       ID_38  after        hypoxia       qtc     81  434.00  440.00   
8       ID_38  after  post_normoxia      spo2     60   97.00  100.00   
9       ID_38  after  post_normoxia    hr_bpm     61   69.00   90.00   
10      ID_38  after  post_normoxia        pi     60    1.21    3.16   
11      ID_38  after  post_normoxia       qtc     65  435.00  443.00   
12      ID_38  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_39  after   pre_normoxia      spo2      8   98.00   99.00   
1       ID_39  after   pre_normoxia    hr_bpm      8   78.00   87.00   
2       ID_39  after   pre_normoxia        pi      8    7.24    9.51   
3       ID_39  after   pre_normoxia       qtc      8  417.00  417.00   
4       ID_39  after        hypoxia      spo2     96   84.00   91.00   
5       ID_39  after        hypoxia    hr_bpm     96  102.00  120.00   
6       ID_39  after        hypoxia        pi     96    2.31    3.95   
7       ID_39  after        hypoxia       qtc     96  414.00  438.00   
8       ID_39  after  post_normoxia      spo2     73   99.00  100.00   
9       ID_39  after  post_normoxia    hr_bpm     73   74.00   97.00   
10      ID_39  after  post_normoxia        pi     73    2.03    5.94   
11      ID_39  after  post_normoxia       qtc     73  412.00  419.00   
12      ID_39  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_40  after   pre_normoxia      spo2     54   98.00   99.00   
1       ID_40  after   pre_normoxia    hr_bpm     56   61.00   81.00   
2       ID_40  after   pre_normoxia        pi     55    4.45    8.72   
3       ID_40  after   pre_normoxia       qtc     42  383.00  410.00   
4       ID_40  after        hypoxia      spo2     78   89.00   95.00   
5       ID_40  after        hypoxia    hr_bpm     78   73.00   89.00   
6       ID_40  after        hypoxia        pi     78    3.34   14.80   
7       ID_40  after        hypoxia       qtc     78  383.00  404.00   
8       ID_40  after  post_normoxia      spo2    101   97.00  100.00   
9       ID_40  after  post_normoxia    hr_bpm    101   60.00   75.00   
10      ID_40  after  post_normoxia        pi    101    4.25   12.00   
11      ID_40  after  post_normoxia       qtc    101  383.00  394.00   
12      ID_40  after           drop      spo

INFO:root:stats_df:
   subject_id status          phase parameter  count     min     max  \
0       ID_42  after   pre_normoxia      spo2    156   97.00  100.00   
1       ID_42  after   pre_normoxia    hr_bpm    164   70.00   86.00   
2       ID_42  after   pre_normoxia        pi    156    0.46    1.11   
3       ID_42  after   pre_normoxia       qtc    154  399.00  424.00   
4       ID_42  after        hypoxia      spo2    114   91.00   97.00   
5       ID_42  after        hypoxia    hr_bpm    114   80.00  106.00   
6       ID_42  after        hypoxia        pi    114    0.41    0.84   
7       ID_42  after        hypoxia       qtc    114  423.00  436.00   
8       ID_42  after  post_normoxia      spo2     77   98.00  100.00   
9       ID_42  after  post_normoxia    hr_bpm     77   70.00  106.00   
10      ID_42  after  post_normoxia        pi     77    0.39    0.87   
11      ID_42  after  post_normoxia       qtc     77  432.00  441.00   
12      ID_42  after           drop      spo

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_01  before   pre_normoxia      spo2    103   97.00  100.00   
1       ID_01  before   pre_normoxia    hr_bpm    113   58.00   79.00   
2       ID_01  before   pre_normoxia        pi    103    0.30    5.71   
3       ID_01  before   pre_normoxia       qtc     99  405.00  418.00   
4       ID_01  before        hypoxia      spo2    133   92.00   96.00   
5       ID_01  before        hypoxia    hr_bpm    133   62.00   77.00   
6       ID_01  before        hypoxia        pi    133    0.79    1.43   
7       ID_01  before        hypoxia       qtc    133  404.00  419.00   
8       ID_01  before  post_normoxia      spo2     43   99.00  100.00   
9       ID_01  before  post_normoxia    hr_bpm     43   60.00   70.00   
10      ID_01  before  post_normoxia        pi     43    0.68    1.18   
11      ID_01  before  post_normoxia       qtc     43  407.00  409.00   
12      ID_01  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_03  before   pre_normoxia      spo2    353   94.00  100.00   
1       ID_03  before   pre_normoxia    hr_bpm    353   62.00   88.00   
2       ID_03  before   pre_normoxia        pi    353    1.03   20.00   
3       ID_03  before   pre_normoxia       qtc    353  376.00  383.00   
4       ID_03  before        hypoxia      spo2    141   85.00   94.00   
5       ID_03  before        hypoxia    hr_bpm    141   74.00  100.00   
6       ID_03  before        hypoxia        pi    141    2.70    7.45   
7       ID_03  before        hypoxia       qtc    141  390.00  398.00   
8       ID_03  before  post_normoxia      spo2      4   99.00   99.00   
9       ID_03  before  post_normoxia    hr_bpm      4   80.00   83.00   
10      ID_03  before  post_normoxia        pi      4    1.76    2.03   
11      ID_03  before  post_normoxia       qtc      4  391.00  391.00   
12      ID_03  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_04  before   pre_normoxia      spo2     19   98.00   99.00   
1       ID_04  before   pre_normoxia    hr_bpm     19   68.00   76.00   
2       ID_04  before   pre_normoxia        pi     19    1.80    2.45   
3       ID_04  before   pre_normoxia       qtc     19  407.00  409.00   
4       ID_04  before        hypoxia      spo2     79   80.00   95.00   
5       ID_04  before        hypoxia    hr_bpm     79   68.00   87.00   
6       ID_04  before        hypoxia        pi     79    1.57    3.86   
7       ID_04  before        hypoxia       qtc     79  406.00  412.00   
8       ID_04  before  post_normoxia      spo2    140   96.00  100.00   
9       ID_04  before  post_normoxia    hr_bpm    140   57.00   77.00   
10      ID_04  before  post_normoxia        pi    140    1.54    6.16   
11      ID_04  before  post_normoxia       qtc    140  408.00  420.00   
12      ID_04  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_05  before   pre_normoxia      spo2    161   99.00  100.00   
1       ID_05  before   pre_normoxia    hr_bpm    163   46.00   84.00   
2       ID_05  before   pre_normoxia        pi    162    3.69    9.31   
3       ID_05  before   pre_normoxia       qtc    156  422.00  452.00   
4       ID_05  before        hypoxia      spo2     91   86.00   94.00   
5       ID_05  before        hypoxia    hr_bpm     91   55.00   92.00   
6       ID_05  before        hypoxia        pi     91    3.23    8.76   
7       ID_05  before        hypoxia       qtc     91  417.00  425.00   
8       ID_05  before  post_normoxia      spo2     22  100.00  100.00   
9       ID_05  before  post_normoxia    hr_bpm     22   53.00   71.00   
10      ID_05  before  post_normoxia        pi     22    2.68    5.69   
11      ID_05  before  post_normoxia       qtc     22  409.00  419.00   
12      ID_05  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_06  before   pre_normoxia      spo2     36   96.00   99.00   
1       ID_06  before   pre_normoxia    hr_bpm     39   68.00   87.00   
2       ID_06  before   pre_normoxia        pi     36    6.50    8.86   
3       ID_06  before   pre_normoxia       qtc     29  397.00  404.00   
4       ID_06  before        hypoxia      spo2    121   92.00   98.00   
5       ID_06  before        hypoxia    hr_bpm    121   92.00  110.00   
6       ID_06  before        hypoxia        pi    121    5.66    9.98   
7       ID_06  before        hypoxia       qtc    121  395.00  405.00   
8       ID_06  before  post_normoxia      spo2     35  100.00  100.00   
9       ID_06  before  post_normoxia    hr_bpm     35   87.00  105.00   
10      ID_06  before  post_normoxia        pi     35    4.07    5.90   
11      ID_06  before  post_normoxia       qtc     35  391.00  398.00   
12      ID_06  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_07  before   pre_normoxia      spo2     35   98.00  100.00   
1       ID_07  before   pre_normoxia    hr_bpm     35   62.00   82.00   
2       ID_07  before   pre_normoxia        pi     35    1.36    1.94   
3       ID_07  before   pre_normoxia       qtc     35  422.00  430.00   
4       ID_07  before        hypoxia      spo2    101   88.00   95.00   
5       ID_07  before        hypoxia    hr_bpm     99   76.00  104.00   
6       ID_07  before        hypoxia        pi    101    0.52    1.10   
7       ID_07  before        hypoxia       qtc    101  430.00  442.00   
8       ID_07  before  post_normoxia      spo2     40  100.00  100.00   
9       ID_07  before  post_normoxia    hr_bpm     40   68.00   87.00   
10      ID_07  before  post_normoxia        pi     40    0.44    2.08   
11      ID_07  before  post_normoxia       qtc     40  435.00  438.00   
12      ID_07  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_08  before   pre_normoxia      spo2     57   99.00  100.00   
1       ID_08  before   pre_normoxia    hr_bpm     58   74.00   86.00   
2       ID_08  before   pre_normoxia        pi     57    2.26    6.13   
3       ID_08  before   pre_normoxia       qtc     45  412.00  419.00   
4       ID_08  before        hypoxia      spo2     63   91.00   96.00   
5       ID_08  before        hypoxia    hr_bpm     63  114.00  134.00   
6       ID_08  before        hypoxia        pi     63    2.50    7.49   
7       ID_08  before        hypoxia       qtc     63  423.00  431.00   
8       ID_08  before  post_normoxia      spo2     23   99.00  100.00   
9       ID_08  before  post_normoxia    hr_bpm     23  101.00  108.00   
10      ID_08  before  post_normoxia        pi     23    1.68    3.70   
11      ID_08  before  post_normoxia       qtc     23  427.00  429.00   
12      ID_08  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_09  before   pre_normoxia      spo2     41   99.00  100.00   
1       ID_09  before   pre_normoxia    hr_bpm     67   51.00   61.00   
2       ID_09  before   pre_normoxia        pi     41    0.30    4.74   
3       ID_09  before   pre_normoxia       qtc     54  431.00  445.00   
4       ID_09  before        hypoxia      spo2    117   86.00   95.00   
5       ID_09  before        hypoxia    hr_bpm    117   61.00   78.00   
6       ID_09  before        hypoxia        pi    117    0.55    4.54   
7       ID_09  before        hypoxia       qtc    117  431.00  438.00   
8       ID_09  before  post_normoxia      spo2     68   97.00  100.00   
9       ID_09  before  post_normoxia    hr_bpm     94   48.00   70.00   
10      ID_09  before  post_normoxia        pi     70    0.81    3.92   
11      ID_09  before  post_normoxia       qtc     38  436.00  456.00   
12      ID_09  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_10  before   pre_normoxia      spo2     94   96.00  100.00   
1       ID_10  before   pre_normoxia    hr_bpm     94   67.00  106.00   
2       ID_10  before   pre_normoxia        pi     94    1.55    8.05   
3       ID_10  before   pre_normoxia       qtc     94  422.00  427.00   
4       ID_10  before        hypoxia      spo2    151   85.00   97.00   
5       ID_10  before        hypoxia    hr_bpm    151   77.00  106.00   
6       ID_10  before        hypoxia        pi    151    1.06    6.13   
7       ID_10  before        hypoxia       qtc    151  417.00  428.00   
8       ID_10  before  post_normoxia      spo2    142   96.00  100.00   
9       ID_10  before  post_normoxia    hr_bpm    142   65.00   83.00   
10      ID_10  before  post_normoxia        pi    142    1.71    8.52   
11      ID_10  before  post_normoxia       qtc    142  425.00  437.00   
12      ID_10  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_12  before   pre_normoxia      spo2      0     NaN     NaN   
1       ID_12  before   pre_normoxia    hr_bpm      0     NaN     NaN   
2       ID_12  before   pre_normoxia        pi      0     NaN     NaN   
3       ID_12  before   pre_normoxia       qtc      0     NaN     NaN   
4       ID_12  before        hypoxia      spo2    126   80.00   94.00   
5       ID_12  before        hypoxia    hr_bpm    126   67.00   85.00   
6       ID_12  before        hypoxia        pi    126    1.25    6.10   
7       ID_12  before        hypoxia       qtc    126  403.00  407.00   
8       ID_12  before  post_normoxia      spo2     65   96.00  100.00   
9       ID_12  before  post_normoxia    hr_bpm     65   61.00   81.00   
10      ID_12  before  post_normoxia        pi     65    1.65    5.53   
11      ID_12  before  post_normoxia       qtc     65  406.00  411.00   
12      ID_12  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_13  before   pre_normoxia      spo2    148   98.00  100.00   
1       ID_13  before   pre_normoxia    hr_bpm    148   70.00  109.00   
2       ID_13  before   pre_normoxia        pi    148    1.23    3.57   
3       ID_13  before   pre_normoxia       qtc    142  450.00  470.00   
4       ID_13  before        hypoxia      spo2    139   91.00   95.00   
5       ID_13  before        hypoxia    hr_bpm    139   84.00  154.00   
6       ID_13  before        hypoxia        pi    139    1.22    2.26   
7       ID_13  before        hypoxia       qtc    139  444.00  496.00   
8       ID_13  before  post_normoxia      spo2     96   99.00  100.00   
9       ID_13  before  post_normoxia    hr_bpm     96   76.00  106.00   
10      ID_13  before  post_normoxia        pi     96    0.85    2.61   
11      ID_13  before  post_normoxia       qtc     96  458.00  469.00   
12      ID_13  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_14  before   pre_normoxia      spo2      0     NaN     NaN   
1       ID_14  before   pre_normoxia    hr_bpm      0     NaN     NaN   
2       ID_14  before   pre_normoxia        pi      0     NaN     NaN   
3       ID_14  before   pre_normoxia       qtc      0     NaN     NaN   
4       ID_14  before        hypoxia      spo2    103   88.00   93.00   
5       ID_14  before        hypoxia    hr_bpm    103   77.00   95.00   
6       ID_14  before        hypoxia        pi    103    3.42   11.00   
7       ID_14  before        hypoxia       qtc    103  420.00  432.00   
8       ID_14  before  post_normoxia      spo2     45  100.00  100.00   
9       ID_14  before  post_normoxia    hr_bpm     45   63.00   80.00   
10      ID_14  before  post_normoxia        pi     45    4.62   13.10   
11      ID_14  before  post_normoxia       qtc     45  423.00  433.00   
12      ID_14  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_15  before   pre_normoxia      spo2     39   96.00  100.00   
1       ID_15  before   pre_normoxia    hr_bpm     45   72.00  109.00   
2       ID_15  before   pre_normoxia        pi     40    1.73    3.05   
3       ID_15  before   pre_normoxia       qtc     31  408.00  418.00   
4       ID_15  before        hypoxia      spo2    115   91.00   97.00   
5       ID_15  before        hypoxia    hr_bpm    115   83.00  112.00   
6       ID_15  before        hypoxia        pi    115    1.11    2.03   
7       ID_15  before        hypoxia       qtc    115  399.00  414.00   
8       ID_15  before  post_normoxia      spo2     45   99.00  100.00   
9       ID_15  before  post_normoxia    hr_bpm     45   65.00  100.00   
10      ID_15  before  post_normoxia        pi     45    0.84    2.07   
11      ID_15  before  post_normoxia       qtc     45  405.00  419.00   
12      ID_15  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_18  before   pre_normoxia      spo2     18   98.00   99.00   
1       ID_18  before   pre_normoxia    hr_bpm     18   81.00   98.00   
2       ID_18  before   pre_normoxia        pi     18    2.66    3.58   
3       ID_18  before   pre_normoxia       qtc     18  423.00  443.00   
4       ID_18  before        hypoxia      spo2     94   85.00   95.00   
5       ID_18  before        hypoxia    hr_bpm     94   82.00  105.00   
6       ID_18  before        hypoxia        pi     94    2.35    4.14   
7       ID_18  before        hypoxia       qtc     94  433.00  440.00   
8       ID_18  before  post_normoxia      spo2     16   98.00  100.00   
9       ID_18  before  post_normoxia    hr_bpm     16   73.00   85.00   
10      ID_18  before  post_normoxia        pi     16    3.08    6.13   
11      ID_18  before  post_normoxia       qtc     16  436.00  438.00   
12      ID_18  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_19  before   pre_normoxia      spo2     69   96.00  100.00   
1       ID_19  before   pre_normoxia    hr_bpm     90   66.00   87.00   
2       ID_19  before   pre_normoxia        pi     69    5.09    8.50   
3       ID_19  before   pre_normoxia       qtc     74  395.00  413.00   
4       ID_19  before        hypoxia      spo2    138   85.00   98.00   
5       ID_19  before        hypoxia    hr_bpm    138   74.00  106.00   
6       ID_19  before        hypoxia        pi    138    2.65    7.24   
7       ID_19  before        hypoxia       qtc    138  404.00  411.00   
8       ID_19  before  post_normoxia      spo2     24   91.00  100.00   
9       ID_19  before  post_normoxia    hr_bpm     24   70.00   87.00   
10      ID_19  before  post_normoxia        pi     24    2.95    4.96   
11      ID_19  before  post_normoxia       qtc     24  405.00  406.00   
12      ID_19  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_23  before   pre_normoxia      spo2    126   97.00   99.00   
1       ID_23  before   pre_normoxia    hr_bpm    126   84.00  105.00   
2       ID_23  before   pre_normoxia        pi    126    1.38    9.25   
3       ID_23  before   pre_normoxia       qtc    126  421.00  430.00   
4       ID_23  before        hypoxia      spo2    149   92.00   96.00   
5       ID_23  before        hypoxia    hr_bpm    149   83.00  108.00   
6       ID_23  before        hypoxia        pi    149    2.09    4.93   
7       ID_23  before        hypoxia       qtc    149  406.00  430.00   
8       ID_23  before  post_normoxia      spo2    244   97.00  100.00   
9       ID_23  before  post_normoxia    hr_bpm    248   76.00  101.00   
10      ID_23  before  post_normoxia        pi    244    1.28    5.74   
11      ID_23  before  post_normoxia       qtc    248  419.00  434.00   
12      ID_23  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_25  before   pre_normoxia      spo2    214   93.00  100.00   
1       ID_25  before   pre_normoxia    hr_bpm    216   59.00   91.00   
2       ID_25  before   pre_normoxia        pi    215    0.99   13.20   
3       ID_25  before   pre_normoxia       qtc    202  412.00  425.00   
4       ID_25  before        hypoxia      spo2    121   83.00   97.00   
5       ID_25  before        hypoxia    hr_bpm    121   73.00  113.00   
6       ID_25  before        hypoxia        pi    121    0.61    3.60   
7       ID_25  before        hypoxia       qtc    121  415.00  424.00   
8       ID_25  before  post_normoxia      spo2     58   99.00  100.00   
9       ID_25  before  post_normoxia    hr_bpm     66   61.00   89.00   
10      ID_25  before  post_normoxia        pi     60    0.93   14.10   
11      ID_25  before  post_normoxia       qtc     66  420.00  426.00   
12      ID_25  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_27  before   pre_normoxia      spo2      0     NaN     NaN   
1       ID_27  before   pre_normoxia    hr_bpm      0     NaN     NaN   
2       ID_27  before   pre_normoxia        pi      0     NaN     NaN   
3       ID_27  before   pre_normoxia       qtc      0     NaN     NaN   
4       ID_27  before        hypoxia      spo2    144   86.00   95.00   
5       ID_27  before        hypoxia    hr_bpm    144   72.00   95.00   
6       ID_27  before        hypoxia        pi    144    1.38    3.47   
7       ID_27  before        hypoxia       qtc    144  400.00  405.00   
8       ID_27  before  post_normoxia      spo2     91   95.00  100.00   
9       ID_27  before  post_normoxia    hr_bpm     91   56.00   90.00   
10      ID_27  before  post_normoxia        pi     91    1.45    5.36   
11      ID_27  before  post_normoxia       qtc     91  401.00  418.00   
12      ID_27  before          


Mean of empty slice


Mean of empty slice


Mean of empty slice

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_28  before   pre_normoxia      spo2      0     NaN     NaN   
1       ID_28  before   pre_normoxia    hr_bpm      5   74.00   77.00   
2       ID_28  before   pre_normoxia        pi      0     NaN     NaN   
3       ID_28  before   pre_normoxia       qtc      0     NaN     NaN   
4       ID_28  before        hypoxia      spo2    124   89.00   99.00   
5       ID_28  before        hypoxia    hr_bpm    124   70.00  102.00   
6       ID_28  before        hypoxia        pi    124    0.97    2.43   
7       ID_28  before        hypoxia       qtc    124  394.00  408.00   
8       ID_28  before  post_normoxia      spo2     74  100.00  100.00   
9       ID_28  before  post_normoxia    hr_bpm     74   72.00   85.00   
10      ID_28  before  post_normoxia        pi     74    0.91    3.28   
11      ID_28  before  post_normoxia  

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_30  before   pre_normoxia      spo2     75   99.00  100.00   
1       ID_30  before   pre_normoxia    hr_bpm     93   79.00  114.00   
2       ID_30  before   pre_normoxia        pi     76    0.44    1.45   
3       ID_30  before   pre_normoxia       qtc     79  425.00  452.00   
4       ID_30  before        hypoxia      spo2    111   82.00   95.00   
5       ID_30  before        hypoxia    hr_bpm    111   96.00  123.00   
6       ID_30  before        hypoxia        pi    111    0.39    3.73   
7       ID_30  before        hypoxia       qtc    111  431.00  437.00   
8       ID_30  before  post_normoxia      spo2     62   99.00  100.00   
9       ID_30  before  post_normoxia    hr_bpm     62   85.00  111.00   
10      ID_30  before  post_normoxia        pi     62    0.60    5.38   
11      ID_30  before  post_normoxia       qtc     62  436.00  441.00   
12      ID_30  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_31  before   pre_normoxia      spo2     25   97.00  100.00   
1       ID_31  before   pre_normoxia    hr_bpm     37    0.00   89.00   
2       ID_31  before   pre_normoxia        pi     26    2.07    4.27   
3       ID_31  before   pre_normoxia       qtc     20  402.00  407.00   
4       ID_31  before        hypoxia      spo2    100   85.00   95.00   
5       ID_31  before        hypoxia    hr_bpm    100   73.00   98.00   
6       ID_31  before        hypoxia        pi    100    1.90    2.92   
7       ID_31  before        hypoxia       qtc    100  418.00  424.00   
8       ID_31  before  post_normoxia      spo2     19   99.00  100.00   
9       ID_31  before  post_normoxia    hr_bpm     19   62.00   83.00   
10      ID_31  before  post_normoxia        pi     19    1.34    2.85   
11      ID_31  before  post_normoxia       qtc     19  431.00  438.00   
12      ID_31  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_32  before   pre_normoxia      spo2     31   98.00   99.00   
1       ID_32  before   pre_normoxia    hr_bpm     31   77.00   93.00   
2       ID_32  before   pre_normoxia        pi     31   12.70   16.40   
3       ID_32  before   pre_normoxia       qtc     31  402.00  407.00   
4       ID_32  before        hypoxia      spo2    132   92.00   98.00   
5       ID_32  before        hypoxia    hr_bpm    132  104.00  125.00   
6       ID_32  before        hypoxia        pi    132    2.51    6.90   
7       ID_32  before        hypoxia       qtc    132  393.00  406.00   
8       ID_32  before  post_normoxia      spo2     29   98.00  100.00   
9       ID_32  before  post_normoxia    hr_bpm     29   97.00  128.00   
10      ID_32  before  post_normoxia        pi     29    3.20    7.65   
11      ID_32  before  post_normoxia       qtc     29  403.00  406.00   
12      ID_32  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_33  before   pre_normoxia      spo2      5   99.00   99.00   
1       ID_33  before   pre_normoxia    hr_bpm     10   81.00   91.00   
2       ID_33  before   pre_normoxia        pi      5    2.03    2.76   
3       ID_33  before   pre_normoxia       qtc      5  399.00  399.00   
4       ID_33  before        hypoxia      spo2    125   89.00   98.00   
5       ID_33  before        hypoxia    hr_bpm    125   87.00  138.00   
6       ID_33  before        hypoxia        pi    125    0.84    2.21   
7       ID_33  before        hypoxia       qtc    125  415.00  453.00   
8       ID_33  before  post_normoxia      spo2      7   99.00  100.00   
9       ID_33  before  post_normoxia    hr_bpm      7   89.00  101.00   
10      ID_33  before  post_normoxia        pi      7    0.86    1.53   
11      ID_33  before  post_normoxia       qtc      7  434.00  434.00   
12      ID_33  before          


Mean of empty slice


Mean of empty slice

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_34  before   pre_normoxia      spo2    134   99.00  100.00   
1       ID_34  before   pre_normoxia    hr_bpm    140   74.00   99.00   
2       ID_34  before   pre_normoxia        pi    134    0.53    1.34   
3       ID_34  before   pre_normoxia       qtc    117  405.00  415.00   
4       ID_34  before        hypoxia      spo2     74   82.00   93.00   
5       ID_34  before        hypoxia    hr_bpm     47   81.00  142.00   
6       ID_34  before        hypoxia        pi     74    0.78    2.75   
7       ID_34  before        hypoxia       qtc      0     NaN     NaN   
8       ID_34  before  post_normoxia      spo2    142   95.00  100.00   
9       ID_34  before  post_normoxia    hr_bpm    142   70.00   93.00   
10      ID_34  before  post_normoxia        pi    142    0.82    5.51   
11      ID_34  before  post_normoxia       qtc     40  400.0

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_35  before   pre_normoxia      spo2     31   97.00   99.00   
1       ID_35  before   pre_normoxia    hr_bpm     31   72.00   84.00   
2       ID_35  before   pre_normoxia        pi     31    3.87    9.40   
3       ID_35  before   pre_normoxia       qtc     31  425.00  432.00   
4       ID_35  before        hypoxia      spo2    122   87.00   96.00   
5       ID_35  before        hypoxia    hr_bpm    122   69.00   97.00   
6       ID_35  before        hypoxia        pi    122    2.03    7.75   
7       ID_35  before        hypoxia       qtc    122  429.00  434.00   
8       ID_35  before  post_normoxia      spo2    202  100.00  100.00   
9       ID_35  before  post_normoxia    hr_bpm    202   68.00   97.00   
10      ID_35  before  post_normoxia        pi    202    2.29   11.20   
11      ID_35  before  post_normoxia       qtc    202  434.00  442.00   
12      ID_35  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_37  before   pre_normoxia      spo2     89   96.00  100.00   
1       ID_37  before   pre_normoxia    hr_bpm     89   71.00   83.00   
2       ID_37  before   pre_normoxia        pi     89    1.69    8.41   
3       ID_37  before   pre_normoxia       qtc     89  382.00  387.00   
4       ID_37  before        hypoxia      spo2    106   87.00   95.00   
5       ID_37  before        hypoxia    hr_bpm    106   74.00   92.00   
6       ID_37  before        hypoxia        pi    106    0.80    4.18   
7       ID_37  before        hypoxia       qtc    106  382.00  384.00   
8       ID_37  before  post_normoxia      spo2     37   98.00  100.00   
9       ID_37  before  post_normoxia    hr_bpm     43   72.00   93.00   
10      ID_37  before  post_normoxia        pi     37    0.81    3.59   
11      ID_37  before  post_normoxia       qtc     43  385.00  387.00   
12      ID_37  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_38  before   pre_normoxia      spo2    145   95.00  100.00   
1       ID_38  before   pre_normoxia    hr_bpm    150   61.00   85.00   
2       ID_38  before   pre_normoxia        pi    147    1.50   20.00   
3       ID_38  before   pre_normoxia       qtc    136  447.00  464.00   
4       ID_38  before        hypoxia      spo2    126   87.00   96.00   
5       ID_38  before        hypoxia    hr_bpm    126   70.00   91.00   
6       ID_38  before        hypoxia        pi    126    1.43    5.51   
7       ID_38  before        hypoxia       qtc    126  443.00  469.00   
8       ID_38  before  post_normoxia      spo2     53   99.00  100.00   
9       ID_38  before  post_normoxia    hr_bpm     53   67.00   77.00   
10      ID_38  before  post_normoxia        pi     53    1.22    4.00   
11      ID_38  before  post_normoxia       qtc     53  444.00  446.00   
12      ID_38  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_39  before   pre_normoxia      spo2     16   97.00   99.00   
1       ID_39  before   pre_normoxia    hr_bpm     29   74.00   84.00   
2       ID_39  before   pre_normoxia        pi     16    4.95    8.41   
3       ID_39  before   pre_normoxia       qtc     15  406.00  413.00   
4       ID_39  before        hypoxia      spo2    100   88.00   95.00   
5       ID_39  before        hypoxia    hr_bpm    102   88.00  105.00   
6       ID_39  before        hypoxia        pi    100    1.00    4.25   
7       ID_39  before        hypoxia       qtc    102  407.00  412.00   
8       ID_39  before  post_normoxia      spo2     37   99.00  100.00   
9       ID_39  before  post_normoxia    hr_bpm     37   78.00   89.00   
10      ID_39  before  post_normoxia        pi     37    1.23    2.10   
11      ID_39  before  post_normoxia       qtc     37  407.00  414.00   
12      ID_39  before          

INFO:root:stats_df:
   subject_id  status          phase parameter  count    min    max  \
0       ID_40  before   pre_normoxia      spo2    159  96.00   99.0   
1       ID_40  before   pre_normoxia    hr_bpm    159  68.00   97.0   
2       ID_40  before   pre_normoxia        pi    159  13.00   20.0   
3       ID_40  before        hypoxia      spo2    123  88.00   94.0   
4       ID_40  before        hypoxia    hr_bpm    123  74.00  107.0   
5       ID_40  before        hypoxia        pi    123   6.50   16.8   
6       ID_40  before  post_normoxia      spo2     25  88.00  100.0   
7       ID_40  before  post_normoxia    hr_bpm     25  63.00   73.0   
8       ID_40  before  post_normoxia        pi     25   7.46   18.3   
9       ID_40  before           drop      spo2     50  92.00   99.0   
10      ID_40  before           drop    hr_bpm     50  68.00  102.0   
11      ID_40  before           drop        pi     50   8.84   20.0   
12      ID_40  before       recovery      spo2      5  90

INFO:root:stats_df:
   subject_id  status          phase parameter  count     min     max  \
0       ID_42  before   pre_normoxia      spo2     15   97.00   99.00   
1       ID_42  before   pre_normoxia    hr_bpm     17   84.00   96.00   
2       ID_42  before   pre_normoxia        pi     15    0.42    0.46   
3       ID_42  before   pre_normoxia       qtc     11  404.00  404.00   
4       ID_42  before        hypoxia      spo2    105   89.00   97.00   
5       ID_42  before        hypoxia    hr_bpm    105  102.00  129.00   
6       ID_42  before        hypoxia        pi    104    0.22    5.72   
7       ID_42  before        hypoxia       qtc    105  402.00  415.00   
8       ID_42  before  post_normoxia      spo2     18   99.00  100.00   
9       ID_42  before  post_normoxia    hr_bpm     18   94.00  105.00   
10      ID_42  before  post_normoxia        pi     18    0.57    1.37   
11      ID_42  before  post_normoxia       qtc     18  412.00  412.00   
12      ID_42  before          

### Save results

In [None]:
segmenter_main_output_df.to_excel('segmenter_v6_2_output_v3.xlsx', index=False)

In [None]:
segmenter_main_output_df

Unnamed: 0,subject_id,status,phase,parameter,count,min,max,mean,median,std,slope
0,ID_01,after,pre_normoxia,spo2,136,95.00,100.00,99.272059,99.00,0.793183,
1,ID_01,after,pre_normoxia,hr_bpm,146,76.00,103.00,89.602740,90.00,4.737931,
2,ID_01,after,pre_normoxia,pi,136,1.01,6.83,3.836103,3.32,1.332032,
3,ID_01,after,pre_normoxia,qtc,132,384.00,401.00,395.363636,398.00,4.713587,
4,ID_01,after,hypoxia,spo2,112,93.00,97.00,95.133929,95.00,0.963248,-0.008153
...,...,...,...,...,...,...,...,...,...,...,...
2004,ID_42,before,drop,pi,31,0.30,0.76,0.465806,0.35,0.172158,0.010109
2006,ID_42,before,recovery,spo2,24,89.00,99.00,93.541667,92.00,3.322900,0.409130
2007,ID_42,before,recovery,hr_bpm,24,93.00,113.00,103.208333,104.00,4.671646,-0.513478
2008,ID_42,before,recovery,pi,23,0.45,3.55,1.810435,1.68,1.043671,


In [None]:
display(hrv_results)

Unnamed: 0,HRV_MeanNN,HRV_SDNN,HRV_SDANN1,HRV_SDNNI1,HRV_SDANN2,HRV_SDNNI2,HRV_SDANN5,HRV_SDNNI5,HRV_RMSSD,HRV_SDSD,...,HRV_LZC,subject_id,status,sample_rate,hypoxic_total_seconds,prehypoxic_total_seconds,posthypoxic_total_seconds,premask length (sec),postmask length (sec),phase
0,669.989525,53.145115,22.349552,48.056272,15.985918,50.872473,,,25.229961,25.244065,...,0.569719,ID_01,after,256,800,600,600,600,600,PRE-HYPOX
1,657.886171,45.729725,28.949547,35.899820,29.025337,37.357518,,,21.242996,21.254164,...,0.582211,ID_01,after,256,800,600,600,600,600,HYPOX
2,778.906250,52.933151,26.502046,46.059430,22.040106,48.870928,,,28.004821,28.022804,...,0.684908,ID_01,after,256,800,600,600,600,600,POST-HYPOX
3,790.567282,49.013735,14.743564,45.166221,6.037413,47.872374,,,25.474208,25.490842,...,0.668867,ID_03,after,500,860,600,600,600,600,PRE-HYPOX
4,663.592920,41.318392,24.654376,32.559176,24.418563,33.898105,,,17.215718,17.222995,...,0.477973,ID_03,after,500,860,600,600,600,600,HYPOX
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,673.117845,258.529251,30.837169,257.717610,20.773642,258.249492,,,401.152861,401.378410,...,0.736871,ID_40,before,500,855,600,310,600,310,HYPOX
167,718.997669,241.900714,21.712594,242.779329,9.334315,242.200296,,,287.561452,287.897934,...,0.794985,ID_40,before,500,855,600,310,600,310,POST-HYPOX
336,564.121117,125.689621,116.137366,49.352656,120.552369,62.890823,,,56.051018,56.082521,...,0.171215,ID_42,before,256,765,481,385,481,385,PRE-HYPOX
337,534.961286,41.261953,12.653549,36.473989,12.058787,37.820001,,,32.162707,32.177035,...,0.614052,ID_42,before,256,765,481,385,481,385,HYPOX


In [None]:
hrv_results.to_excel('segmenter_6_2_hrv_results_v8.xlsx', index=False)

In [None]:
## adaptív thresholdot simított SpO2-ből nézze a minimumot
## ID 11 before
## nk.event_finder debug

## Folder reorder and naming

In [None]:
# Define the source and destination directories
src_directory = r"Data\batch_3"
dest_directory = r"Data\batch_3_reID_2"
log_file = r"Data\batch_3_reID_2/log_file.xlsx"  # Path to save the Excel log file

# Run the function
rename_and_copy_folders(src_directory, dest_directory, log_file)

# Statistics [HRV]

## Paired T tests

### Paired t test, after vs before (during hypoxia)

In [None]:
import pandas as pd
import scipy.stats as stats
import logging

def perform_paired_t_tests_hypoxia_covid(hrv_df):
    # Ensure the 'before' and 'after' rows are paired by 'filename'
    before_df = hrv_df[hrv_df['status'] == 'before']
    after_df = hrv_df[hrv_df['status'] == 'after']

    # Merge on the 'filename' to ensure pairing between before and after
    paired_df = pd.merge(before_df, after_df, on='subject_id', suffixes=('_before', '_after'))

    # List of HRV columns to test (excluding non-HRV columns like 'filename', 'status', etc.)
    hrv_columns = [col for col in hrv_df.columns if col.startswith('HRV')]

    results = []

    # Perform paired t-test for each HRV metric
    for column in hrv_columns:
        before_values = paired_df[f'{column}_before']
        after_values = paired_df[f'{column}_after']
        
        # Perform paired t-test
        t_stat, p_value = stats.ttest_rel(before_values, after_values)
        
        # Store the result
        results.append({
            'HRV_Metric': column,
            't_statistic': t_stat,
            'p_value': p_value,
            'before mean': before_values.mean(),
            'after mean': after_values.mean(),
            'before sd': before_values.std(),
            'after sd': after_values.std(),
        })

    # Convert results to DataFrame
    results_df = pd.DataFrame(results)
    
    return results_df

In [None]:
hrv_df = pd.read_excel('segmenter_6_2_hrv_results_v7.xlsx')

In [None]:
# Example usage

t_test_results = perform_paired_t_tests_hypoxia_covid(hrv_results)
print(t_test_results)

In [45]:
t_test_results.to_excel('HRV_paired_t_test_results_v2.xlsx')

### Paired t test, normoxia (pre) vs hypoxia (no before/after assessed)

In [109]:
def paired_t_tests_pre_and_hypoxia(hrv_df):
    # Ensure the 'before' and 'after' rows are paired by 'filename'
    pre_ph_df = hrv_df[hrv_df['phase'] == 'PRE-HYPOX']
    hypox_df = hrv_df[hrv_df['phase'] == 'HYPOX']

    # Merge on the 'filename' to ensure pairing between before and hypox
    paired_df = pd.merge(pre_ph_df, hypox_df, on='subject_id', suffixes=('_before', '_after'))

    # List of HRV columns to test (excluding non-HRV columns like 'filename', 'status', etc.)
    hrv_columns = [col for col in hrv_df.columns if col.startswith('HRV')]

    results = []

    # Perform paired t-test for each HRV metric
    for column in hrv_columns:
        pre_values = paired_df[f'{column}_before']
        hypoxia_values = paired_df[f'{column}_after']
        
        # Perform paired t-test
        t_stat, p_value = stats.ttest_rel(pre_values, hypoxia_values)
        
        # Store the result
        results.append({
            'HRV_Metric': column,
            't_statistic': t_stat,
            'p_value': p_value,
            'before mean': pre_values.mean(),
            'after mean': hypoxia_values.mean(),
            'before sd': pre_values.std(),
            'after sd': hypoxia_values.std(),
        })

    # Convert results to DataFrame
    results_df = pd.DataFrame(results)
    
    return results_df

In [111]:
hrv_df = pd.read_excel('segmenter_6_2_hrv_results_v7.xlsx')

In [112]:
pre_vs_hypoxia_ttest = paired_t_tests_pre_and_hypoxia(hrv_df)

In [113]:
pre_vs_hypoxia_ttest.to_excel('HRV_paired_t_test_normoxia_vs_hypoxia_v1.xlsx')

## Normality and sphericity tests (for repeated measure ANOVA)

### Normality test

In [121]:
hrv_df = pd.read_excel('segmenter_6_2_hrv_results_v8.xlsx')

In [150]:
hrv_df = hrv_df[hrv_df['status'] != 'POST-HYPOX']

In [147]:
import pandas as pd
import numpy as np
from scipy import stats
import pingouin as pg

# Function to check normality using Shapiro-Wilk test
def check_normality(df, columns):
    passed_columns = {}  # To store columns that passed Normality test (p-value >= 0.05)
    failed_columns = {}  # To store columns that failed Normality test (p-value < 0.05)

    for col in columns:
        # Perform Shapiro-Wilk test for normality, dropping NaN values
        stat, p_value = stats.shapiro(df[col].dropna())
        
        # Store the results in a dictionary for each column
        result = {"statistic": stat, "p_value": p_value}
        
        if p_value < 0.05:
            failed_columns[col] = result  # Add to failed columns if p-value < 0.05
        else:
            passed_columns[col] = result  # Add to passed columns if p-value >= 0.05

    return passed_columns, failed_columns

# Function to check sphericity using Pingouin's sphericity test
def check_sphericity(df, columns, subject_column, condition_column):
    passed_columns = {}  # To store columns that passed Mauchly's test (p-value >= 0.05)
    failed_columns = {}  # To store columns that failed Mauchly's test (p-value < 0.05)
    
    for col in columns:
        # Skip columns with missing values or constant values
        if df[col].isnull().all() or df[col].nunique() == 1:
            print(f"Skipping column {col} due to empty or constant values.")
            continue
        
        # Check sample size: if less than 2 unique values per group, skip Mauchly's test
        group_sizes = df.groupby(condition_column)[col].nunique()
        if any(group_sizes < 2):  # If any condition has less than 2 unique values
            print(f"Skipping column {col} due to insufficient sample size.")
            continue
        
        # Perform Mauchly's test for sphericity
        results = pg.sphericity(data=df, dv=col, within=condition_column, subject=subject_column)
        
        # Extract the p-value from the results
        p_value = results.pval[0]  # Access the p-value for the sphericity test
        
        # Store the results in the appropriate dictionary
        if p_value < 0.05:
            failed_columns[col] = results  # Add to failed columns if p-value < 0.05
        else:
            passed_columns[col] = results  # Add to passed columns if p-value >= 0.05
    
    return passed_columns, failed_columns


In [151]:
# List of HRV parameters columns to check (those starting with HRV_)
hrv_columns = [col for col in hrv_df.columns if col.startswith('HRV_')]

In [None]:
# Check normality for each HRV parameter
passed_normality, failed_normality = check_normality(hrv_df, hrv_columns)

print("Normality Results (Shapiro-Wilk Test):")
print("\nPassed Normality Test:")
for col, result in passed_normality.items():
    print(f"{col}: statistic = {result['statistic']}, p-value = {result['p_value']}")

print("\nFailed Normality Test (p-value < 0.05):")
for col, result in failed_normality.items():
    print(f"{col}: statistic = {result['statistic']}, p-value = {result['p_value']}")

# Check sphericity for each HRV parameter
passed_sphericity, failed_sphericity = check_sphericity(hrv_df, hrv_columns, subject_column='subject_id', condition_column='phase')

print("\nSphericity Results (Mauchly's Test):")
print("\nPassed Mauchly's Test:")
for col, result in passed_sphericity.items():
    print(f"{col}: Mauchly's p-value = {result.pval[0]}")

print("\nFailed Mauchly's Test (p-value < 0.05):")
for col, result in failed_sphericity.items():
    print(f"{col}: Mauchly's p-value = {result.pval[0]}")

## Friedmantest

In [162]:
import pandas as pd

def remove_incomplete_pairs_empty_columns_and_posthypoxia(df):
    """
    Remove rows with 'POST-HYPOX' phase, subjects who do not have all necessary pairs
    for the Friedman test, and columns with all NaN values.
    The necessary pairs are:
    1. 'before_PRE-HYPOX', 'after_PRE-HYPOX', 'before_HYPOX', 'after_HYPOX'
    """
    # Step 1: Remove rows with 'POST-HYPOX' phase
    df_cleaned = df[df['phase'] != 'POST-HYPOX']
    
    # Step 2: Remove columns with all NaN values
    df_cleaned = df_cleaned.dropna(axis=1, how='all')
    
    # Step 3: Define the required pairs: (status, phase)
    required_pairs = [
        ('before', 'PRE-HYPOX'),
        ('after', 'PRE-HYPOX'),
        ('before', 'HYPOX'),
        ('after', 'HYPOX')
    ]
    
    # Step 4: Identify subjects with missing pairs
    valid_subjects = []
    
    # Group by subject_id and check if they have all the required pairs
    grouped = df_cleaned.groupby('subject_id')

    for subject_id, group in grouped:
        # Check for the presence of all required pairs for the current subject
        pairs_present = []
        for status, phase in required_pairs:
            condition = (group['status'] == status) & (group['phase'] == phase)
            if group[condition].shape[0] > 0:  # If the subject has at least one measurement for this pair
                pairs_present.append((status, phase))
        
        # If all required pairs are present, keep the subject
        if len(pairs_present) == len(required_pairs):
            valid_subjects.append(subject_id)
    
    # Step 5: Filter the dataframe to only keep valid subjects
    cleaned_df = df_cleaned[df_cleaned['subject_id'].isin(valid_subjects)]
    
    return cleaned_df


In [161]:
# Example usage:
# Assuming hrv_df is your DataFrame:
cleaned_df = remove_incomplete_pairs_empty_columns_and_posthypoxia(hrv_df)
print(cleaned_df)

     HRV_MeanNN    HRV_SDNN  HRV_SDANN1  HRV_SDNNI1  HRV_SDANN2  HRV_SDNNI2  \
0    669.989525   53.145115   22.349552   48.056272   15.985918   50.872473   
1    657.886171   45.729725   28.949547   35.899820   29.025337   37.357518   
3    790.567282   49.013735   14.743564   45.166221    6.037413   47.872374   
4    663.592920   41.318392   24.654376   32.559176   24.418563   33.898105   
6    685.056364   53.175394   37.945814   37.120190   25.464632   45.943596   
..          ...         ...         ...         ...         ...         ...   
163  619.011759   30.914476   12.123095   28.227454    8.151640   29.857261   
165  714.922527  245.758953   27.985027  245.828808   19.010849  246.174103   
166  673.117845  258.529251   30.837169  257.717610   20.773642  258.249492   
168  564.121117  125.689621  116.137366   49.352656  120.552369   62.890823   
169  534.961286   41.261953   12.653549   36.473989   12.058787   37.820001   

      HRV_RMSSD    HRV_SDSD  HRV_CVNN  HRV_CVSD  ..

In [163]:
hrv_columns = [col for col in cleaned_df.columns if col.startswith('HRV_')]

for hrv_col in hrv_columns:
    reshaped_df = cleaned_df.pivot_table(index='subject_id', columns=['status', 'phase'], values=hrv_col)
    
    # Friedman test for the current HRV parameter
    stat, p_value = stats.friedmanchisquare(
        reshaped_df[('before', 'PRE-HYPOX')],
        reshaped_df[('after', 'PRE-HYPOX')],
        reshaped_df[('before', 'HYPOX')],
        reshaped_df[('after', 'HYPOX')]
    )
    
    # Print the results for each HRV parameter
    print(f"{hrv_col} - Friedman Test statistic: {stat}, p-value: {p_value}")
    if p_value < 0.05:
        print("Significant difference!")
    else:
        print("No significant difference.")


HRV_MeanNN - Friedman Test statistic: 24.96923076923082, p-value: 1.5670921741338807e-05
Significant difference!
HRV_SDNN - Friedman Test statistic: 10.753846153846155, p-value: 0.013134182290360117
Significant difference!
HRV_SDANN1 - Friedman Test statistic: nan, p-value: nan
No significant difference.
HRV_SDNNI1 - Friedman Test statistic: nan, p-value: nan
No significant difference.
HRV_SDANN2 - Friedman Test statistic: nan, p-value: nan
No significant difference.
HRV_SDNNI2 - Friedman Test statistic: nan, p-value: nan
No significant difference.
HRV_RMSSD - Friedman Test statistic: 5.215384615384664, p-value: 0.15668815404052713
No significant difference.
HRV_SDSD - Friedman Test statistic: 5.215384615384664, p-value: 0.15668815404052713
No significant difference.
HRV_CVNN - Friedman Test statistic: 5.584615384615404, p-value: 0.1336643681905119
No significant difference.
HRV_CVSD - Friedman Test statistic: 4.015384615384619, p-value: 0.25980765437148656
No significant difference.
H

### Friedman posthoc test

In [None]:
import pandas as pd
import scipy.stats as stats

# Assuming cleaned_df is already processed and you have the reshaped data for the Friedman test
hrv_columns = [col for col in cleaned_df.columns if col.startswith('HRV_')]

# Initialize an empty list to store the results
results_list = []

# Perform Friedman Test and Pairwise Wilcoxon Signed-Rank Test for each HRV parameter
for hrv_col in hrv_columns:
    reshaped_df = cleaned_df.pivot_table(index='subject_id', columns=['status', 'phase'], values=hrv_col)
    
    # Flatten the MultiIndex columns by converting them to a single-level column
    reshaped_df.columns = ['_'.join(col).strip() for col in reshaped_df.columns.values]
    
    # Friedman test for the current HRV parameter
    stat, p_value = stats.friedmanchisquare(
        reshaped_df['before_PRE-HYPOX'],
        reshaped_df['after_PRE-HYPOX'],
        reshaped_df['before_HYPOX'],
        reshaped_df['after_HYPOX']
    )
    
    # Store Friedman test results
    result = {
        'HRV_Parameter': hrv_col,
        'Friedman_Test_Statistic': stat,
        'Friedman_Test_p-value': p_value,
        'Significant_Difference': 'Yes' if p_value < 0.05 else 'No'
    }
    
    # Perform Pairwise Wilcoxon Signed-Rank Test if Friedman test is significant
    if p_value < 0.05:
        # Pairwise comparisons: create a list of column pairs to compare
        comparisons = [
            ('before_PRE-HYPOX', 'after_PRE-HYPOX'),
            ('before_PRE-HYPOX', 'before_HYPOX'),
            ('before_PRE-HYPOX', 'after_HYPOX'),
            ('after_PRE-HYPOX', 'before_HYPOX'),
            ('after_PRE-HYPOX', 'after_HYPOX'),
            ('before_HYPOX', 'after_HYPOX')
        ]
        
        # Perform Wilcoxon test for each pair
        for comp1, comp2 in comparisons:
            # Perform the Wilcoxon Signed-Rank test
            stat, p_val = stats.wilcoxon(reshaped_df[comp1], reshaped_df[comp2])
            
            # Store Wilcoxon results
            result[f'Wilcoxon_{comp1}_vs_{comp2}_Statistic'] = stat
            result[f'Wilcoxon_{comp1}_vs_{comp2}_p-value'] = p_val
            if p_val < 0.05:
                result[f'Wilcoxon_{comp1}_vs_{comp2}_Significant'] = 'Yes'
            else:
                result[f'Wilcoxon_{comp1}_vs_{comp2}_Significant'] = 'No'
    
    # Append the result for the current HRV parameter to the results list
    results_list.append(result)

# Convert the results list to a DataFrame for easy reading
final_results_df = pd.DataFrame(results_list)

# Display the results DataFrame
print(final_results_df)



   HRV_Parameter  Friedman_Test_Statistic  Friedman_Test_p-value  \
0     HRV_MeanNN                24.969231               0.000016   
1       HRV_SDNN                10.753846               0.013134   
2     HRV_SDANN1                      NaN                    NaN   
3     HRV_SDNNI1                      NaN                    NaN   
4     HRV_SDANN2                      NaN                    NaN   
..           ...                      ...                    ...   
83    HRV_RCMSEn                 0.046154               0.997399   
84        HRV_CD                 0.600000               0.896432   
85       HRV_HFD                 2.907692               0.406077   
86       HRV_KFD                 3.092308               0.377611   
87       HRV_LZC                 8.538462               0.036100   

   Significant_Difference  \
0                     Yes   
1                     Yes   
2                      No   
3                      No   
4                      No   
..       

In [174]:
final_results_df.to_excel('PostHocWilcoxonTest_v1.xlsx')