In [5]:
!pip install pyEDFlib
!pip install pingouin



In [6]:
import os
import pyedflib
import pandas as pd
import datetime
import numpy as np
import pyedflib
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
import plotly.figure_factory as ff
import scipy
from pingouin import intraclass_corr
import pingouin as pg
from sklearn.metrics import cohen_kappa_score
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import balanced_accuracy_score
import seaborn as sn

In [7]:
from constants import *
print(PATH)

../data/


In [8]:
def testUtil():
    return "PSG Utils Loaded"

In [26]:
def get_psg_tsv_file(participant_id: str, file_type:str, _path:str):
    """Valid entries for file_type is "scans" or "events"
    
    example: get_psg_tsv_file("002", "scans")"""
    
    try:
        file = pd.read_csv(os.path.join(_path, "SMS_" + participant_id, "RemLogic", "sub_%s.tsv" % (file_type)), sep="\t") 
        return file
    
    except Exception as e:
        print("No file %s" % str(e))

In [27]:
def get_sleepstages_psg_remlogic(subjectID, _path):
    """ Function to get Sleepstages from PSG.
    Returns a Tuple: 
        [0]:boolean --> True, if request was sucessefull
        [1]: DataFrame with LOCAL timestamps as Index and Sleepstages as Columne entries
    The stages are coded the following way:
    0 = Awake = SLEEP-S0
    1 = REM   = SLEEP-REM
    2 = Light = SLEEP-S1 and  SLEEP-S2 
    3 = Deep  = SLEEP-S3"""
    acq_time = get_psg_tsv_file(subjectID, "scans",_path)
    if acq_time is None:
        return(False,)
    acq_time = pd.to_datetime(acq_time.columns[1])

    event_data = get_psg_tsv_file(subjectID, "events",_path)
    sleep_data= event_data[(event_data['trial_type']=="SLEEP-S0")|
                      (event_data['trial_type']=="SLEEP-S1")|
                      (event_data['trial_type']=="SLEEP-S2")|
                      (event_data['trial_type']=="SLEEP-S3")|
                      (event_data['trial_type']=="SLEEP-REM")].reset_index().drop("index",axis=1)

    sleep_data["time"]= pd.Series(acq_time).repeat(len(sleep_data)).values + pd.to_timedelta(sleep_data.onset, unit='s')

    sleep_data['trial_type'].replace({"SLEEP-S0":"0_Stage_Wake",
             "SLEEP-S1":"2_Stage_Light",
             "SLEEP-S2":"2_Stage_Light",
             "SLEEP-S3":"3_Stage_Deep",
             "SLEEP-REM":"1_Stage_REM"}, inplace=True)

    sleep_data["sleep_stage_num_psg"]= sleep_data["trial_type"]
    sleep_data["sleep_stage_num_psg"].replace({"3_Stage_Deep":3, "2_Stage_Light":2, "1_Stage_REM":1, "0_Stage_Wake":0}, inplace=True)
    sleep_data['timestamp_local'] =pd.to_datetime(sleep_data['time']).dt.tz_localize('Europe/Paris')
    sleep_data.set_index('timestamp_local', inplace= True)
    return (True,sleep_data[['sleep_stage_num_psg']])

get_sleepstages("003", RAW_PATH)


(True,
                            sleep_stage_num_psg
 timestamp_local                               
 2021-08-09 22:25:00+02:00                    0
 2021-08-09 22:25:30+02:00                    0
 2021-08-09 22:26:00+02:00                    0
 2021-08-09 22:26:30+02:00                    0
 2021-08-09 22:27:00+02:00                    0
 ...                                        ...
 2021-08-10 08:52:30+02:00                    0
 2021-08-10 08:53:00+02:00                    0
 2021-08-10 08:53:30+02:00                    0
 2021-08-10 08:54:00+02:00                    0
 2021-08-10 08:54:30+02:00                    0
 
 [1260 rows x 1 columns])

In [13]:
def get_sleep_stages(subjectID, _path):

    acq_time = get_psg_tsv_file(subjectID, "scans",_path)
    
    if acq_time is None:
        return(False,)
    
    acq_time = pd.to_datetime(acq_time.columns[1])

    event_data = get_psg_tsv_file(subjectID, "events",_path)
    event_data.index = event_data.onset.apply(lambda x: acq_time + datetime.timedelta(seconds=x))
    
    event_data.index =pd.to_datetime(event_data.index).tz_localize('Europe/Zurich')
    
    sleep_data = event_data[event_data.trial_type.str.contains("SLEEP")].copy()

    sleep_class_map = {
        "SLEEP-S0": 3,
        "SLEEP-S1": 1, 
        "SLEEP-S2": 1,
        "SLEEP-S3": 0,
        "SLEEP-REM": 2
    }

    sleep_data["sleep_stage_num_psg"] = sleep_data.trial_type.map(sleep_class_map)
    sleep_data = sleep_data[sleep_data.trial_type.isin(sleep_class_map.keys())]
    
    return (True,sleep_data[['sleep_stage_num_psg']])

In [14]:
def get_EMFIT_sleep_stages_file(subjectID, emfitID, _path, shift = "0s"):
    """Returns a Tuple: 
        [0]:boolean --> True, if request was sucessefull
        [1]: DataFrame with LOCAL timestamps as Index and the rest as Columne entries        
    """
    path_to_subject_root= f'{_path}/SMS_{subjectID}'
    path_to_subject_psg= f'{path_to_subject_root}/psg/EMFIT_{emfitID}'

    if os.path.exists(path_to_subject_psg):
        
        file_list= os.listdir(path_to_subject_psg)
        file = f'SMS_{subjectID}-psg-EMFIT_{emfitID}-processed_sleepclasses.csv'

        if file in file_list:
        
            data_EMFIT =  pd.read_csv(f'{path_to_subject_psg}/{file}')
            data_EMFIT = data_EMFIT.rename(columns={'timestamp': 'timestamp_local'})
            data_EMFIT.index = pd.to_datetime(data_EMFIT.timestamp_local, unit="s")
            data_EMFIT = data_EMFIT.tz_localize("UTC")
            data_EMFIT = data_EMFIT.tz_convert("Europe/Zurich")
            
            EMFIT_SLEEPCLASS_MAP = {
                1: 0,
                2: 1,
                3: 2,
                4: 3
            }

            data_EMFIT["sleep_stage_num_emfit"] = data_EMFIT.sleep_class.map(EMFIT_SLEEPCLASS_MAP)
            data_EMFIT_resampled = data_EMFIT.resample("30s").median().ffill()       #should be an int 

            return (True,data_EMFIT_resampled)
        
        else:
            print(f'ERROR: no sleep_stages file in folder {path_to_subject_psg}')
            return (False,)
            
    else:
        print(f'No EMFIT_{emfitID} Data for Participant: {subjectID}')
        
        return (False,)

In [18]:
def get_sleepclasses_mat(participant_id: str, emfit_id: str, _path:str):
    
    try:
        file = pd.read_csv(os.path.join(_path, "SMS_" + participant_id, "EMFIT_" + emfit_id, "SMS_%s-psg-EMFIT_%s-processed_sleepclasses.csv" % (participant_id, emfit_id)), index_col=0)
    
        return file    
    
    except Exception as e:
        print("No file %s" % str(e))
    


get_sleepclasses_mat("003", "001505", RAW_PATH)

Unnamed: 0,at,timestamp,sleep_class
1,2021-08-09 22:44,1628541893,2
2,2021-08-09 23:07,1628543273,1
3,2021-08-09 23:26,1628544383,2
4,2021-08-09 23:48,1628545733,3
5,2021-08-10 00:12,1628547173,2
6,2021-08-10 00:36,1628548613,1
7,2021-08-10 00:55,1628549753,2
8,2021-08-10 01:15,1628550923,1
9,2021-08-10 01:27,1628551673,2
10,2021-08-10 01:42,1628552543,3


In [21]:
def get_empatica_file(participant_id: str, sensor: str, _path:str = RAW_PATH):
    
    try:
        file = pd.read_csv(os.path.join(_path, "SMS_" + participant_id, "Empatica", "SMS_%s-psg-Empatica-%s.csv" % (participant_id, sensor)))
        return file
    
    except Exception as e:
        print("No file %s" % str(e))

In [22]:
def get_somnofy_data(subjectID, _path, shift = "0s"):
    """Returns a Tuple: 
        [0]:boolean --> True, if request was sucessefull
        [1]: DataFrame with LOCAL timestamps as Index and the rest as Columne entries        
    """
    path_to_subject_root= f'{_path}/SMS_{subjectID}'
    path_to_subject_psg= f'{path_to_subject_root}/Somnofy'

    if os.path.exists(path_to_subject_psg):
        file_list= os.listdir(path_to_subject_psg)
        if len(file_list) != 1:
            print(f'ERROR: more than 1 or no file in folder {path_to_subject_psg}')
            return (False,)
        else:
            data_somnofy =  pd.read_csv(f'{path_to_subject_psg}/{file_list[0]}')

            data_somnofy["sleep_stage"].replace({1:"3_Stage_Deep", 2:"2_Stage_Light", 3:"1_Stage_REM", 4: "0_Stage_Wake", 5: np.NaN}, inplace=True)
            data_somnofy["sleep_stage_num_somnofy"]= data_somnofy["sleep_stage"]
            data_somnofy["sleep_stage_num_somnofy"].replace({"3_Stage_Deep":3, "2_Stage_Light":2, "1_Stage_REM":1, "0_Stage_Wake":0}, inplace=True)

            data_somnofy['timestamp_local'] =pd.to_datetime(data_somnofy['timestamp_local']).dt.tz_localize('Europe/Paris')

            data_somnofy['timestamp_local']=data_somnofy['timestamp_local']+ pd.Timedelta('30s')

            data_somnofy.set_index('timestamp_local', inplace = True)
            data_somnofy_resampled=data_somnofy.resample('30s').median().drop("Unnamed: 0",axis= 1)
            data_somnofy_resampled["sleep_stage_num_somnofy"]=data_somnofy_resampled["sleep_stage_num_somnofy"].round(decimals=0)
        return (True,data_somnofy_resampled)
    else:
        print(f'No Somnofy Data for Participant: {subjectID}')
        return (False,)

In [23]:
def get_sleepstages_radar(subjectID, _path):
    """    Function to get Sleepstages from Somnofy Report. the stages are coded the following way 
    (the first column is the value now)
    0 = Awake = 4
    1 = REM   = 3
    2 = Light = 2
    3 = Deep  = 1
    nan= ...  = 5 <-- No Sleep Stage classified, due to (movement) artefacts"""
    _d = get_somnofy_data(subjectID, _path)
    if _d[0]:
        return (True,_d[1][['sleep_stage_num_somnofy']])
    else:
        print("Sleep stages could no be extracted")
        return (False,)
              
get_sleepstages_radar("003",RAW_PATH)

(True,
                            sleep_stage_num_somnofy
 timestamp_local                                   
 2021-08-09 22:25:30+02:00                      0.0
 2021-08-09 22:26:00+02:00                      0.0
 2021-08-09 22:26:30+02:00                      0.0
 2021-08-09 22:27:00+02:00                      0.0
 2021-08-09 22:27:30+02:00                      0.0
 ...                                            ...
 2021-08-10 08:53:00+02:00                      0.0
 2021-08-10 08:53:30+02:00                      0.0
 2021-08-10 08:54:00+02:00                      0.0
 2021-08-10 08:54:30+02:00                      0.0
 2021-08-10 08:55:00+02:00                      0.0
 
 [1260 rows x 1 columns])

In [39]:
def get_sleepstages_psg_somno(subjectID, _path):
    """    Function to get Sleepstages from Somnomedics Report. the stages are coded the following way 
    (the first column is the value now)
    0 = Awake = 4
    1 = REM   = 3
    2 = Light = 2
    3 = Deep  = 1
    nan= ...  = 5 <-- No Sleep Stage classified, due to (movement) artefacts"""
    path_to_file = os.path.join(_path, "SMS_" + subjectID, "somnomedics", "Sleep profile.txt")
    if os.path.exists(path_to_file):
        with open(path_to_file) as f:
            lines = f.readlines()
        if len(lines) <= 7:  # intro lines
            print(f'No Somnomedics Data for Participant: {subjectID}')
            return (False,)
        data = lines[7:]
        data = [a[:-1].split(";") for a in data]
        print(data[0])
        data_somnomedics = pd.DataFrame(data, columns = ['timestamp_local', 'sleep_stage'])
        data_somnomedics["sleep_stage"].replace({" N3":"3_Stage_Deep", " N2":"2_Stage_Light", " N1":"2_Stage_Light", 
                                                 " REM":"1_Stage_REM", " Wake": "0_Stage_Wake", " Artefact": np.NaN, " A": np.NaN}, inplace=True)
        data_somnomedics["sleep_stage_num"]= data_somnomedics["sleep_stage"]
        data_somnomedics["sleep_stage_num"].replace({"3_Stage_Deep":3, "2_Stage_Light":2, "1_Stage_REM":1, "0_Stage_Wake":0}, inplace=True)

        data_somnomedics['timestamp_local'] =pd.to_datetime(data_somnomedics['timestamp_local']).dt.tz_localize('Europe/Paris')

        data_somnomedics['timestamp_local']=data_somnomedics['timestamp_local']+ pd.Timedelta('30s')

        data_somnomedics.set_index('timestamp_local', inplace = True)
        return (True,data_somnomedics)
    else:
        print(f'No Somnofy Data for Participant: {subjectID}')
        return (False,)

get_sleepstages_psg_somno("043", RAW_PATH)


['24.01.2022 22:12:30,000', ' A']


(True,
                              sleep_stage  sleep_stage_num
 timestamp_local                                          
 2022-01-24 22:13:00+01:00            NaN              NaN
 2022-01-24 22:13:30+01:00   0_Stage_Wake              0.0
 2022-01-24 22:14:00+01:00   0_Stage_Wake              0.0
 2022-01-24 22:14:30+01:00   0_Stage_Wake              0.0
 2022-01-24 22:15:00+01:00   0_Stage_Wake              0.0
 ...                                  ...              ...
 2022-01-25 08:11:00+01:00  2_Stage_Light              2.0
 2022-01-25 08:11:30+01:00  2_Stage_Light              2.0
 2022-01-25 08:12:00+01:00  2_Stage_Light              2.0
 2022-01-25 08:12:30+01:00  2_Stage_Light              2.0
 2022-01-25 08:13:00+01:00   0_Stage_Wake              0.0
 
 [1201 rows x 2 columns])

In [24]:
p = Util_psg.get_sleep_stages(test_participant,path)[1]
s = Util_emf.get_sleepstages_emfit(test_participant, emfit_id[0], path)[1]

NameError: name 'Util_psg' is not defined

In [None]:
fig = make_subplots(rows=1, cols=1,shared_xaxes=True)

fig.add_trace(go.Scatter(x=p.index,
                         y=p['sleep_stage_num_psg'],name="PSG",
                         mode="markers+lines"), col=1, row=1)

fig.add_trace(go.Scatter(x=s.index,
                         y=s['sleep_stage_num_emfit'],name="Emfit",
                         mode="markers+lines"), col=1, row=1)