In [None]:
import math
import os.path
from os import listdir, path
from os.path import isfile, join
import datetime
from tqdm.notebook import tqdm

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display
import seaborn as sns
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot


from pyapnea.oscar.oscar_loader import load_session
from pyapnea.oscar.oscar_getter import event_data_to_dataframe, get_channel_from_code
from pyapnea.oscar.oscar_constants import CHANNELS, ChannelID

In [None]:
# init notebook parameters
init_notebook_mode(connected=True)
#pd.set_option('display.max_rows', None)

In [None]:
data_path_cpap1 = '../data/raw/ResMed_23192565579/Events'
list_files = [{'label': f, 'value': f, 'fullpath': join(data_path_cpap1, f)} for f in listdir(data_path_cpap1) if isfile(join(data_path_cpap1, f))]
data_path_cpap2= '../data/raw/ResMed_23221085377/Events'
list_files.extend([{'label': f, 'value': f, 'fullpath': join(data_path_cpap2, f)} for f in listdir(data_path_cpap2) if isfile(join(data_path_cpap2, f))])


In [None]:
def event_data_to_dataframe_all_channel(oscar_session_data):
    global_df = pd.DataFrame(columns=['Col1'])
    possible_channels = [ChannelID.CPAP_FlowRate.value, ChannelID.CPAP_Obstructive.value, ChannelID.CPAP_Pressure.value]
    for channel in oscar_session_data.data.channels:
        if channel.code in possible_channels:
            y_col_name = [c[5] for c in CHANNELS if c[1].value == channel.code][0]
            gain = channel.events[0].gain
            if channel.events[0].t8 == 0:
                channel.events[0].time = range(0, channel.events[0].evcount * int(channel.events[0].rate),
                                               int(channel.events[0].rate))
            df = pd.DataFrame(data={'time': channel.events[0].time,
                                    y_col_name+'_no_gain': channel.events[0].data})
            df[y_col_name] = df[y_col_name+'_no_gain'] * gain

            if channel.events[0].second_field:
                # not tested because do not have 2nd field in files
                df[y_col_name+'2_no_gain'] = channel.events[0].data2
                df[y_col_name + '2'] = df[y_col_name+'2_no_gain'] * gain

            df['time_absolute'] = df['time'] + channel.events[0].ts1
            df['time_absolute'] = pd.to_datetime(df['time_absolute'], unit='ms')
            #df.set_index('time_absolute', inplace=True)
            df = df.filter(regex='^(?!.*_no_gain)')
            if global_df.empty:
                global_df = df
            else:
                #global_df = pd.merge(global_df, df, right_index=True, left_index=True, how='outer')
                global_df = pd.merge(global_df, df, on='time_absolute', how='outer',suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)')
                #global_df = pd.concat([global_df, df])

    global_df['time_absolute'] = global_df['time_absolute'].dt.tz_localize('UTC')
    global_df['local_time'] = global_df['time_absolute'].dt.tz_convert('America/Montreal')
    global_df.sort_index(inplace=True)
    return global_df

# Displaying one session and the first event

In [None]:
# reading files
# number 2 has obstructive events
filename_to_load = '../data/raw/ResMed_23192565579/Events/62202198.001'

oscar_session_data = load_session(filename_to_load)
df = event_data_to_dataframe_all_channel(oscar_session_data)

# TODO handle Leak two value per time
#df_Leak = event_data_to_dataframe(oscar_session_data, ChannelID.CPAP_Leak.value)
#np.where(df.index.duplicated(keep=False) == True)

display(df)


In [None]:
# reorganize dataframe to seaborn imput format
dfc = df[['FlowRate', 'Obstructive', 'Pressure', 'local_time']]
dfm = dfc.melt('local_time', var_name='cols', value_name='vals')
dfm.sort_values(by=['local_time'], inplace=True, ignore_index=True)
dfm_annotation = dfm[(~pd.isnull(dfm['vals']) & (dfm['cols']=='Obstructive'))]
display(dfm_annotation)

In [None]:
sns.set(rc={'figure.figsize':(25,15)})
fig, ax = plt.subplots()
sns.lineplot(data=dfm[dfm['cols']!='Pressure'],x='local_time', y='vals', hue='cols', palette=['r', 'g'], ax= ax)
ax2 = ax.twinx()
# warning : should not interpolate between points...
sns.lineplot(data=dfm[dfm['cols']=='Pressure'],x='local_time', y='vals', hue='cols', palette=['b'], ax = ax2)
for a in dfm_annotation['local_time']:
    plt.axvline(x=a, color='r', linewidth=3)

In [None]:
event_time = dfm_annotation['local_time'].iloc[0]
sns.set(rc={'figure.figsize':(25,15)})
fig, ax = plt.subplots()

ax.set_ylim(-40, 50)
ax.set_xlim(event_time - datetime.timedelta(seconds=30), event_time + datetime.timedelta(seconds=10))
sns.lineplot(data=dfm[dfm['cols']!='Pressure'],x='local_time', y='vals', hue='cols', palette=['r', 'g'], ax = ax)
ax2 = ax.twinx()
# warning : should not interpolate between points...
sns.lineplot(data=dfm[dfm['cols']=='Pressure'],x='local_time', y='vals', hue='cols', palette=['b'], ax = ax2)
for a in dfm_annotation['local_time']:
    plt.axvline(x=a, color='r', linewidth=3)

# Statistics
- Number of sessions
- Lenght of sessions

In [None]:
events_channels = [ChannelID.CPAP_ClearAirway.value,
                   ChannelID.CPAP_Obstructive.value,
                   ChannelID.CPAP_Hypopnea.value,
                   ChannelID.CPAP_Apnea.value, # ClearAway or Obstructive, not determined
                   ]
event_names = [[c[5] for c in CHANNELS if c[1].value == e][0] for e in events_channels]
event_names

In [None]:
stats = dict()
stats['nb_sessions'] = len(list_files)
stats['sessions'] = []



df_event_all_sessions = None

#loading all files
with tqdm(total=len(list_files)) as pbar:
    for f in list_files:
        oscar_session_data = load_session(f['fullpath'])
        # FlowRate
        flowrate_chanel = get_channel_from_code(oscar_session_data, ChannelID.CPAP_FlowRate.value)
        flowrate_event = flowrate_chanel.events[0]

        stat_session = {'filename': f['label'],
                        'nb channel': len(oscar_session_data.data.channels),
                        'ts1' : flowrate_event.ts1,
                        'ts2' : flowrate_event.ts2,
                        'starting time' : datetime.datetime.fromtimestamp(flowrate_event.ts1/1000.0),
                        'lenght FlowRate (ms)': flowrate_event.ts2 - flowrate_event.ts1,
                        'FlowRate present': (ChannelID.CPAP_FlowRate.value in [c.code for c in oscar_session_data.data.channels])}

        # Events
        stat_session['events'] = []
        for e in events_channels:
            event_name = [c[5] for c in CHANNELS if c[1].value == e][0]
            event_channel_df = event_data_to_dataframe(oscar_session_data, e)
            if not event_channel_df.empty:
                df_event = event_channel_df[~pd.isnull(event_channel_df[event_name])]
                df_event['type']  = event_name
                df_event['session'] = f['value']
            else:
                df_event = pd.DataFrame(data=[[np.NAN, event_name, f['value']]], columns=['time_absolute', 'type', 'session'])
            if df_event_all_sessions is None:
                df_event_all_sessions = df_event[['time_absolute', 'type', 'session']]
            else:
                df_event_all_sessions = pd.concat([df_event_all_sessions,  df_event[['time_absolute', 'type', 'session']]])
        stats['sessions'].append(stat_session)
        pbar.update(1)
        oscar_session_data = None


# All event per session
print(df_event_all_sessions)

In [None]:
# stats for event
# number of event per type
df_count_per_type = df_event_all_sessions.groupby(['type'])['time_absolute'].count().to_frame(name = 'count').reset_index().sort_values('type')
# number of event per type per session
df_count_per_type_per_session = df_event_all_sessions.groupby(['session', 'type'])['time_absolute'].count().to_frame(name = 'count').reset_index().sort_values('type')
print(df_count_per_type)
print(df_count_per_type_per_session)

In [None]:
stats

In [None]:
def ms_to_hour_min_sec_ms(ms):
    sec, ms = divmod(ms, 1000)
    min, sec = divmod(sec, 60)
    hour, min = divmod(min, 60)
    return hour, min, sec, ms

In [None]:
import statistics as stat
import math
# FlowRate
length_array = np.array([s['lenght FlowRate (ms)'] for s in stats['sessions']])
avg_len = length_array.mean()
med_len = np.median(length_array)
stddev_len = stat.stdev(length_array.tolist())
stats['FlowRate average length (ms)'] = avg_len
stats['FlowRate stddev (ms)'] = stddev_len
stats['FlowRate median (ms)'] = med_len
stats['FlowRate average length (hmsms))'] = ms_to_hour_min_sec_ms(avg_len)
stats['FlowRate stddev (hmsms)'] = ms_to_hour_min_sec_ms(stddev_len)
stats['FlowRate median (hmsms)'] = ms_to_hour_min_sec_ms(med_len)

# Events
stats['events'] = {'count': pd.Series(df_count_per_type['count'].values,index=df_count_per_type['type']).to_dict()}
stats['events']['average_sess'] = {e: df_count_per_type_per_session[df_count_per_type_per_session['type']==e].mean(numeric_only=True).to_list()[0] for e in event_names}
stats['events']['stddev_sess'] = {e: stat.stdev(df_count_per_type_per_session[df_count_per_type_per_session['type']==e]['count']) for e in event_names}
stats['events']['median'] = {e: stat.median(df_count_per_type_per_session[df_count_per_type_per_session['type']==e]['count']) for e in event_names}

In [None]:
stats

# Length

In [None]:
sns.set(rc={'figure.figsize':(5,5)})
sns.boxplot(data=length_array/1000.0/60/60).set(xlabel='Sessions', ylabel='Length (hour)')

# Events

In [None]:
# nb event per session (average + stddev)
sns.set(rc={'figure.figsize':(5,5)})
sns.barplot(data=df_count_per_type_per_session, x='type', y='count', errorbar='sd').set(xlabel='type of event', ylabel='Nb events per session (avg+stdev)')

In [None]:
# nb event per session (average + stderr)
sns.set(rc={'figure.figsize':(5,5)})
sns.barplot(data=df_count_per_type_per_session, x='type', y='count', errorbar='se').set(xlabel='type of event', ylabel='Nb events per session (avg+stderr)')

In [None]:
sns.boxplot(data=df_count_per_type_per_session, x='type', y='count')

In [None]:
# nb event per type
sns.set(rc={'figure.figsize':(5,5)})
sns.barplot(data=df_count_per_type, x='type', y='count').set(xlabel='type of event', ylabel='Nb events per type')