In [1]:
"""
This cell walks through each subfile in SHH2_Data and process each csv/xml pair
rr intervals are calculated for each epoch and corresponded with their sleep stage
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

#making the macro df
rri_sleep_df = pd.DataFrame(columns=['RR Interval','Sleep Stage'])

#setting my directory path
mypath = '/Users/jonathan.williams/Desktop/Desktop-s-will511/VAIL_AI/VAIL_AI_Code/Driver_Drowsiness/SHH2_Data'
import os
for entry in os.listdir(mypath):
    if os.path.isdir(os.path.join(mypath, entry)):
        sub_dir_path = os.path.join(mypath, entry)
        for dirpath, dirnames, files in os.walk(sub_dir_path):
            for file in reversed(sorted(files)): #ensuring that I read the csv file first
                if file.endswith('.csv'):

                    #working with a csv

                    file_name = os.path.join(dirpath,file)

                    #reading csv into df, dropping all cols except seconds and epoch
                    ecg_df = pd.read_csv(file_name)
                    ecg_df = ecg_df[['seconds','epoch']]


                    #print(file_name)

                elif file.endswith('.xml'):

                    #working with hypnogram xml

                    file_name = os.path.join(dirpath,file)

                    xml_data = open(file_name, 'r').read()  # Read file
                    root = ET.XML(xml_data)  # Parse XML

                    data = []
                    cols = []
                    for i, child in enumerate(root):
                        data.append([subchild.text for subchild in child])
                        cols.append(child.tag)

                    hypno_df = pd.DataFrame(data).T  # Write in DF and transpose it
                    hypno_df.columns = cols  # Update column names

                    #droping all other columns except the sleep stages
                    hypno_df = hypno_df[['SleepStages']]

                    #converting sleep_stages to an np array so we can directly pull corresponding sleep stage
                    #note: the nth epoch from the ecg_df corresponds to the [0,nth] item in this np array
                    sleep_stages_arr = hypno_df.to_numpy().reshape(1,-1)


                    #print(file_name)


                    #perfomring rr interval analysis and appending to our macro df:

                    # init our rr interval list and sleep stage list
                    rr_interval_list , sleep_stage_list = [] , []
                    #setting the data log for row 1
                    data_log = [ecg_df.iloc[0, :]]
                    for row_index in range(1, len(ecg_df)):
                        # pulling current row
                        curr_log = ecg_df.iloc[row_index, :]
                        curr_epoch = curr_log[-1]
                        old_epoch = data_log[-1][-1]
                        if curr_epoch != old_epoch:  # we have just hit a new epoch
                            # performing old epoch calculations
                            data_log_np = np.array(data_log)
                            seconds_log = data_log_np[:, 0]
                            rr_interval = np.diff(seconds_log)
                            sleep_stage = int(sleep_stages_arr[0, int(old_epoch)])
                            #storing rr_interval , and correpsonding sleep_stage in respective lists
                            rr_interval_list.append(rr_interval.tolist())
                            sleep_stage_list.append(int(sleep_stage))
                            # storing rr_interval , sleep_stage in dictionary
                            # resetting our data log list of lists and storing the curr_log
                            data_log = [curr_log]
                        else:
                            # data still w/i epoch
                            data_log.append(curr_log)

                    # making temp df
                    temp_rri_sleep_df = pd.DataFrame()
                    temp_rri_sleep_df['RR Interval'] = rr_interval_list
                    temp_rri_sleep_df['Sleep Stage'] = sleep_stage_list

                    # appending temp df to macro
                    rri_sleep_df = rri_sleep_df.append(temp_rri_sleep_df, ignore_index=True)
                    print(len(rri_sleep_df))


1347
2465
3566
4743
5850
7008
8342
9660
10976
12419
13663
15021
16057
17206
18680
19650
20640
21958
23148
24378
25944
26965
27911
28713
29945
31027
32434
33745
34890
35671
36815
37880
39157
40360
41719
42857
43812
44903
45942
47385
48478
49610
50919
51973
53296
54668
55792
56797
58267
59560
60697
61910
63337
64376
65570
67013
68112
69311
70551
71719
72917
74294
75621
76640
77501
78794
79905
81271
82409
83656
84964
85944
87068
88163
89300
90447
91946
93141
94407
95656
96945
97899
99168
100426
102068
103473
104655
105624
106513


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

rri_sleep_df.head()
rri_sleep_df.to_csv('/Users/jonathan.williams/Desktop/Desktop-s-will511/VAIL_AI/VAIL_AI_Code/Driver_Drowsiness/RRIS_Database.csv')

In [None]:
from ast import literal_eval
rri_sleep_df.head()
rri_sleep_df.tail()


for row_index in range(len(rri_sleep_df)):
  sample = rri_sleep_df.iloc[row_index]
  rr_string_interval = sample['RR Interval']
  print(process_intervals(rr_string_interval))

In [None]:
def process_intervals(rr_string_interval):
  nums = []
  for char in rr_string_interval.split():
    if char[0]=='[':
      char = char[1:]
    elif char[-1] == ']':
      char = char[:-1]
    nums.append(float(char))
  return nums

In [None]:
"""
#writing the final dataframe to a csv:
csv_save_file_path = '/Users/jonathan.williams/Desktop/Desktop-s-will511/VAIL_AI/VAIL_AI_Code/Driver_Drowsiness/RRinterval_SleepStages_Dataset.csv'
rri_sleep_df.to_csv(csv_save_file_path,encoding='utf-8',index=False)
"""

In [8]:
test = rri_sleep_df.iloc[0]
print(test['RR Interval'])
print(test['Sleep Stage'])

[0.875, 0.890625, 0.8828125, 0.875, 0.8359375, 0.84765625, 0.84765625, 0.83203125, 0.8671875, 0.85546875, 0.875, 0.890625, 0.87109375, 0.90234375, 0.87890625, 0.8828125, 0.86328125, 0.87890625, 0.8828125, 0.88671875, 0.8828125, 0.88671875, 0.8671875, 0.84765625, 0.83203125, 0.828125, 0.85546875, 0.828125, 0.84765625, 0.8515625, 0.859375, 0.87890625, 0.86328125, 0.87890625]
0
