In [None]:
import os.path
from os import listdir, path
from os.path import isfile, join
import datetime
from tqdm.notebook import tqdm

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display
import seaborn as sns
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot


from pyapnea.oscar.oscar_loader import load_session
from pyapnea.oscar.oscar_getter import event_data_to_dataframe, get_channel_from_code
from pyapnea.oscar.oscar_constants import CHANNELS, ChannelID

In [None]:
# init notebook parameters
init_notebook_mode(connected=True)
#pd.set_option('display.max_rows', None)

In [None]:
#data_path = '/home/julien/Documents/OSCAR_Data-master/Profiles/Julien/ResMed_23221085377/Events'
data_path = '/home/julien/Documents/oscar_data/Profiles/Julien/ResMed_23221085377/Events'
list_files = [{'label': f, 'value': f} for f in listdir(data_path) if isfile(join(data_path, f))]

In [None]:
def event_data_to_dataframe_all_channel(oscar_session_data):
    global_df = pd.DataFrame(columns=['Col1'])
    possible_channels = [ChannelID.CPAP_FlowRate.value, ChannelID.CPAP_Obstructive.value, ChannelID.CPAP_Pressure.value]
    for channel in oscar_session_data.data.channels:
        if channel.code in possible_channels:
            y_col_name = [c[5] for c in CHANNELS if c[1].value == channel.code][0]
            gain = channel.events[0].gain
            if channel.events[0].t8 == 0:
                channel.events[0].time = range(0, channel.events[0].evcount * int(channel.events[0].rate),
                                               int(channel.events[0].rate))
            df = pd.DataFrame(data={'time': channel.events[0].time,
                                    y_col_name+'_no_gain': channel.events[0].data})
            df[y_col_name] = df[y_col_name+'_no_gain'] * gain

            if channel.events[0].second_field:
                # not tested because do not have 2nd field in files
                df[y_col_name+'2_no_gain'] = channel.events[0].data2
                df[y_col_name + '2'] = df[y_col_name+'2_no_gain'] * gain

            df['time_absolute'] = df['time'] + channel.events[0].ts1
            df['time_absolute'] = pd.to_datetime(df['time_absolute'], unit='ms')
            #df.set_index('time_absolute', inplace=True)
            df = df.filter(regex='^(?!.*_no_gain)')
            if global_df.empty:
                global_df = df
            else:
                #global_df = pd.merge(global_df, df, right_index=True, left_index=True, how='outer')
                global_df = pd.merge(global_df, df, on='time_absolute', how='outer',suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)')
                #global_df = pd.concat([global_df, df])

    global_df['time_absolute'] = global_df['time_absolute'].dt.tz_localize('UTC')
    global_df['local_time'] = global_df['time_absolute'].dt.tz_convert('America/Montreal')
    global_df.sort_index(inplace=True)
    return global_df

In [None]:
# reading files
# number 2 has obstructive events
oscar_session_data = load_session(os.path.join(data_path, list_files[0]['value']))
df = event_data_to_dataframe_all_channel(oscar_session_data)

# TODO handle Leak two value per time
#df_Leak = event_data_to_dataframe(oscar_session_data, ChannelID.CPAP_Leak.value)
#np.where(df.index.duplicated(keep=False) == True)

#print(df.describe())
#print(df)
display(df)



In [None]:
# reorganize dataframe to seaborn imput format
dfc = df[['FlowRate', 'Obstructive', 'Pressure', 'local_time']]
dfm = dfc.melt('local_time', var_name='cols', value_name='vals')
dfm.sort_values(by=['local_time'], inplace=True, ignore_index=True)
dfm_annotation = dfm[(~pd.isnull(dfm['vals']) & (dfm['cols']=='Obstructive'))]
display(dfm_annotation)

In [None]:
# sns.set(rc={'figure.figsize':(25,15)})
# fig, ax = plt.subplots()
# sns.lineplot(data=dfm,x='local_time', y='vals', hue='cols')
# for a in dfm_annotation['local_time']:
#     plt.axvline(x=a, color='g', linewidth=3)

In [None]:
# sns.set(rc={'figure.figsize':(25,15)})
# fig, ax = plt.subplots()
# ax.set_ylim(-40, 50)
# ax.set_xlim(datetime.datetime(2022, 6, 17, 21, 16,5), datetime.datetime(2022, 6, 17, 21, 16,30))
# sns.lineplot(data=dfm,x='local_time', y='vals', hue='cols', ax = ax)
# for a in dfm_annotation['local_time']:
#     plt.axvline(x=a, color='g', linewidth=3)

# Statistiques
- Nombre of sessions

In [None]:
stats = dict()
stats['nb_sessions'] = len(list_files)
stats['sessions'] = []

#loading all files
with tqdm(total=len(list_files)) as pbar:
    for f in list_files:
        oscar_session_data = load_session(os.path.join(data_path, f['value']))
        # len FlowRate
        flowrate_chanel = get_channel_from_code(oscar_session_data, ChannelID.CPAP_FlowRate.value)
        flowrate_event = flowrate_chanel.events[0]
        stat_session = {'filename': f['label'],
                        'nb channel': len(oscar_session_data.data.channels),
                        'ts1' : flowrate_event.ts1,
                        'ts2' : flowrate_event.ts2,
                        'starting time' : datetime.datetime.fromtimestamp(flowrate_event.ts1/1000.0),
                        'lenght FlowRate (ms)': flowrate_event.ts2 - flowrate_event.ts1,
                        'FlowRate present': (ChannelID.CPAP_FlowRate.value in [c.code for c in oscar_session_data.data.channels])}
        stats['sessions'].append(stat_session)
        pbar.update(1)
        oscar_session_data = None

In [None]:
def ms_to_hour_min_sec_ms(ms):
    sec, ms = divmod(ms, 1000)
    min, sec = divmod(sec, 60)
    hour, min = divmod(min, 60)
    return hour, min, sec, ms

In [None]:
import statistics as stat
length_array = np.array([s['lenght FlowRate (ms)'] for s in stats['sessions']])
avg_len = length_array.mean()
med_len = np.median(length_array)
pstdev_len = stat.pstdev(length_array.tolist())
stats['FlowRate average length (ms)'] = avg_len
stats['FlowRate std error length (ms)'] = pstdev_len
stats['FlowRate median (ms)'] = med_len
stats['FlowRate average length (hmsms))'] = ms_to_hour_min_sec_ms(avg_len)
stats['FlowRate std error length (hmsms)'] = ms_to_hour_min_sec_ms(pstdev_len)
stats['FlowRate median (ms)'] = ms_to_hour_min_sec_ms(med_len)

In [None]:
stats

In [None]:
sns.boxplot(data=length_array/1000.0/60/60).set(xlabel='Sessions', ylabel='Length (hour)')