In [None]:
import numpy as np
import pandas as pd
import datetime
import glob
import sys
import json
import os

# Custom package
import devicely

In [None]:
# Get acc and bvp data for the intervals that blood pressure was measured; exclude exercise period (since no bp measurement was made)
def truncate_empatica(data_empatica, bp_df, verbose=False):
    sub = pd.DataFrame()
    for _, row in bp_df.iterrows():
        truncated_df = data_empatica.truncate(before=row['window_start'], after=row['window_end'])
        truncated_df['utc'] = row['utc']
        truncated_df['bp_sys'] = row['SYS(mmHg)']
        truncated_df['bp_dia'] = row['DIA(mmHg)']
        truncated_df['subject'] = row['subject']
        sub = sub.append(truncated_df)
    if verbose:
        print(sub.head())
        print(sub.describe())
        print(sub.shape)
    return sub

def get_folders(my_path):
    folder_names = [f for f in os.listdir(my_path) if os.path.isdir(os.path.join(my_path, f))]
    return folder_names

In [None]:
def ppg_feature_list(data, normalised=False, verbose=False):
    list_bp = []
    utc = data.groupby(['utc'])
    bvp = 'bvp' 
    if normalised:
        bvp += '_normalised'
    for _, row in utc:
        d = {}
        if row['bvp'].any():
            d['patientid'] = row['subject'].unique()[0]
            d['sbp'] = int(row['bp_sys'].unique()[0])
            d['dbp'] = int(row['bp_dia'].unique()[0])
            d['ppg'] = row[bvp].tolist()
            list_bp.append(d)
    return list_bp

### Motion Sections Removal and Normalization

In [None]:
# Finds sections with high acceleration magnitude and removes them
def remove_motion_sections(df, limit=100, min_size=5, padding=15, std_mult=0.25):
    # Todo check frequency domain for high frequencies
    acc_mag_mean = df['acc_mag'].mean()
    acc_mag_std =  df['acc_mag'].std()
    # Comparison with overall mean and std
    thresh_indices = np.squeeze(np.argwhere((df['acc_mag'].values > acc_mag_mean + std_mult * acc_mag_std) | 
                                            (df['acc_mag'].values < acc_mag_mean - std_mult * acc_mag_std)))
        
    section_indices = []
    section_start = thresh_indices[0]
    for i in range(1, len(thresh_indices) - 1):
        if thresh_indices[i] - thresh_indices[i-1] > limit:
            if thresh_indices[i-1] >= section_start + min_size:
                section_indices.append((section_start - padding, thresh_indices[i-1] + padding))
            section_start = thresh_indices[i]
    if thresh_indices[-1] != section_start:
        section_indices.append((section_start, thresh_indices[-1]))
    section_indices.reverse()
    for (start, end) in section_indices:
        df = df.drop(index=df.iloc[start:end].index)
    return df

def apply_filters(df):
    # No smoothing neccessary due to relatively low sampling frequency
    df['bvp_normalised'] = (df['bvp'] - df['bvp'].min()) / (df['bvp'].max() - df['bvp'].min())
    return df

In [None]:
with open('../../config.json') as f:
    config = json.load(f)

today = datetime.datetime.today().strftime('%Y-%m-%d')
exp_base_path = config['hype']
print(exp_base_path)

# Generate Json for One Patient - Biking

In [None]:
exp_date = 'yyyy-mm-dd'
exp_patient = 'patient_id'
experiment = 'biking'
filtering = 'bfill'

patient_base_path = os.path.join(exp_base_path, exp_date, exp_patient)
print(patient_base_path)
    
# Sources
sources = {
            'tag' : glob.glob(patient_base_path+r'/Tag*').pop(),
            'faros' : glob.glob(patient_base_path+r'/Faros*').pop(),
            'empatica' : glob.glob(patient_base_path+r'/Empatica*').pop(),
            'spacelabs' : glob.glob(patient_base_path+r'/*SpaceLabs*').pop(),
          }

In [None]:
empatica = devicely.EmpaticaReader(sources['empatica'])
empatica.data.head()

In [None]:
if os.path.exists(sources['spacelabs']):
    for file in os.listdir(sources['spacelabs']):
        if file.endswith(".abp"):
            spacelabsfile = os.path.join(sources['spacelabs'], file)
            break
print(spacelabsfile)

In [None]:
bp = devicely.SpacelabsReader(spacelabsfile, 2)
bp.data

In [None]:
time_delta = '30 seconds'
time_delta_modified = time_delta.split(' ')
time_delta_dict = {time_delta_modified[1]: int(time_delta_modified[0])}

bp.set_window(datetime.timedelta(**time_delta_dict), filtering)
bp.data

In [None]:
bp_df = bp.data.drop(['error','z'], axis=1).reset_index().copy()
bp_df.rename(columns={"datetime": "utc"}, inplace=True)
bp_df

In [None]:
subset_empatica = empatica.data[['bvp','acc_mag']].dropna(how='all')
sub_data_empatica = truncate_empatica(subset_empatica, bp_df, verbose=False)

In [None]:
sub_data_empatica.head()

In [None]:
# Optional filtering
sub_data_empatica_motionless = remove_motion_sections(sub_data_empatica)
sub_data_empatica_filtered = apply_filters(sub_data_empatica_motionless)

In [None]:
features = ppg_feature_list(sub_data_empatica_filtered, normalised=True, verbose=False)
print(len(features))

In [None]:
# Create features path if it not exists
features_path = os.path.join('../../features/hype-json', today, experiment, filtering, time_delta.replace(' ',''))
if not os.path.exists(features_path):
    os.makedirs(features_path)

In [None]:
with open(os.path.join(features_path,str(bp_df['subject'].unique()[0])+'_ppg_feature_list_'+time_delta.replace(' ','')+'.json'), 'w') as f:
    json.dump(features, f)

# Generate Json for All Patients

# Biking

In [None]:
# Set Blood Pressure Monitor and Window
bp_monitor = 'spacelabs'
time_delta = '30 seconds'
timeshift = 2 # for converting bp_time to utc
today = datetime.datetime.today().strftime('%Y-%m-%d')
verbose = False
normalised = True
motionless = True
experiment = 'biking'
filtering = 'bfill'

dates = get_folders(exp_base_path)
all_features = []

for date in dates:
    print(date)
    subjects = get_folders(os.path.join(exp_base_path, date))
    for subject in subjects:
        print(subject)
        patient_base_path = os.path.join(exp_base_path, date, subject)
        tag = os.path.join(patient_base_path, 'Tags')
        
        # Check for the Tags
        if not os.path.exists(tag):
            print("No Tag File.")
            print('-----','\n')
            break
        
        # Sources
        sources = {
                    'tag' : glob.glob(patient_base_path+r'/Tag*').pop(),
                    'faros' : glob.glob(patient_base_path+r'/Faros*').pop(),
                    'empatica' : glob.glob(patient_base_path+r'/Empatica*').pop()
                  }
    
        if bp_monitor == 'spacelabs':
            sources['spacelabs'] = glob.glob(patient_base_path+r'/*SpaceLabs*').pop()
            
            if not os.path.exists(sources['spacelabs']):
                print("Subject has no spacelabs file")
                break
             # Read Spacelabs
            for file in os.listdir(sources['spacelabs']):
                if file.endswith(".abp"):
                    spacelabs_file = os.path.join(sources['spacelabs'], file)
                    break
            
            bp = devicely.SpacelabsReader(spacelabs_file, timeshift)
            
            time_delta_modified = time_delta.split(' ')
            time_delta_dict = {time_delta_modified[1]: int(time_delta_modified[0])}
            bp.set_window(datetime.timedelta(**time_delta_dict), filtering)
        
            # Adjust columns
            bp_df = bp.data.drop(['error','z'], axis=1).reset_index().copy()
            bp_df.rename(columns={"datetime": "utc"}, inplace=True)
        if verbose: print(bp_df.head(1))
            
        # Read Empatica
        empatica = devicely.EmpaticaReader(sources['empatica'])
        subset_empatica = empatica.data[['bvp','acc_mag']].dropna(how='all').copy()
        if verbose: print(subset_empatica.head(1))
    
        # Extract Features
        data_empatica = truncate_empatica(subset_empatica, bp_df, verbose=verbose)
        print("Truncated data shape: ", data_empatica.shape)
        # Remove Motion and Apply filters
        if motionless:
            data_empatica = remove_motion_sections(data_empatica)
            print("Montionless data shape: ", data_empatica.shape)
        if normalised:
            data_empatica = apply_filters(data_empatica)
            print("Normalised bvp mean: ", data_empatica['bvp_normalised'].mean())
        # Get PPG values for each bp pair           
        features = ppg_feature_list(data_empatica, normalised=normalised, verbose=verbose)

        if not features:
            print('Subject has no features.')
            print('-----','\n')
            break
        else:
            print("Number of features:", len(features))

        # Create features path if it not exists
        features_path = os.path.join('../../features/hype-json', today, experiment, filtering, time_delta.replace(' ',''))
        if normalised:
            directory = 'normalised'
            if motionless:
                directory = 'normalised-motionless'
            else:
                directory = 'normalised-motion'
        elif not normalised and motionless:
            directory = 'not-normalised-motionless'
        else:
            directory = 'raw'
        features_path = os.path.join(features_path, directory)
        
        if not os.path.exists(features_path):
            os.makedirs(features_path)
            
        if all_features:
            all_features = all_features + features
        else:
            all_features = features
    
        with open(os.path.join(features_path, str(bp_df['subject'].unique()[0])+'_ppg_feature_list_'+time_delta.replace(' ','')+'.json'), 'w') as f:
            json.dump(features, f)
        if verbose: 
            print(features)
            
        print('-----','\n')

print("Total of pairs bp/ppg: ", len(all_features))
with open(os.path.join(features_path,'all_features_'+time_delta.replace(' ','')+'.json'), 'w') as f:
    json.dump(all_features, f)

# 24 Hours

In [None]:
# Set Blood Pressure Monitor and Window
bp_monitor = 'spacelabs'
time_delta = '30 seconds'
timeshift = 2 # for converting bp_time to utc
today = datetime.datetime.today().strftime('%Y-%m-%d')
verbose = False
normalised = True
motionless = True
experiment = '24hours'
filtering = 'bfill'

dates = get_folders(exp_base_path)
all_features = []

for date in dates:
    print(date)
    subjects = get_folders(os.path.join(exp_base_path, date))
    for subject in subjects:
        print(subject)
        patient_base_path = os.path.join(exp_base_path, date, subject, '24 hours')
        
        # Sources
        sources = {
                    'faros' : glob.glob(patient_base_path+r'/Faros*').pop(),
                    'empatica' : glob.glob(patient_base_path+r'/Empatica*').pop()
                  }
    
        if bp_monitor == 'spacelabs':
            sources['spacelabs'] = glob.glob(patient_base_path+r'/*SpaceLabs*').pop()
            
            if not os.path.exists(sources['spacelabs']):
                print("Subject has no spacelabs file")
                break
             # Read Spacelabs
            for file in os.listdir(sources['spacelabs']):
                if file.endswith(".abp"):
                    spacelabs_file = os.path.join(sources['spacelabs'], file)
                    break
            
            bp = devicely.SpacelabsReader(spacelabs_file, timeshift)
            
            time_delta_modified = time_delta.split(' ')
            time_delta_dict = {time_delta_modified[1]: int(time_delta_modified[0])}
            bp.set_window(datetime.timedelta(**time_delta_dict), filtering)
        
            # Adjust columns
            bp_df = bp.data.drop(['error','z'], axis=1).reset_index().copy()
            bp_df.rename(columns={"datetime": "utc"}, inplace=True)                                          
        if verbose: print(bp_df.head(1))
            
        # Read Empatica
        empatica = devicely.EmpaticaReader(sources['empatica'])
        subset_empatica = empatica.data[['bvp','acc_mag']].dropna(how='all').copy()
        if verbose: print(subset_empatica.head(1))
    
        # Extract Features
        data_empatica = truncate_empatica(subset_empatica, bp_df, verbose=verbose)
        print("Truncated data shape: ", data_empatica.shape)
        # Remove Motion and Apply filters
        if motionless:
            data_empatica = remove_motion_sections(data_empatica)
            print("Montionless data shape: ", data_empatica.shape)
        if normalised:
            data_empatica = apply_filters(data_empatica)
            print("Normalised bvp mean: ", data_empatica['bvp_normalised'].mean())
        # Get PPG values for each bp pair           
        features = ppg_feature_list(data_empatica, normalised=normalised, verbose=verbose)

        if not features:
            print('Subject has no features.')
            print('-----','\n')
            break
        else:
            print("Number of features:", len(features))

        # Create features path if it not exists
        features_path = os.path.join('../../features/hype-json', today, experiment, filtering, time_delta.replace(' ',''))
        if normalised:
            directory = 'normalised'
            if motionless:
                directory = 'normalised-motionless'
            else:
                directory = 'normalised-motion'
        elif not normalised and motionless:
            directory = 'not-normalised-motionless'
        else:
            directory = 'raw'
        features_path = os.path.join(features_path, directory)

        if not os.path.exists(features_path):
            os.makedirs(features_path)
            
        if all_features:
            all_features = all_features + features
        else:
            all_features = features
    
        with open(os.path.join(features_path, str(bp_df['subject'].unique()[0])+'_ppg_feature_list_'+time_delta.replace(' ','')+'.json'), 'w') as f:
            json.dump(features, f)
        if verbose: 
            print(features)
            
        print('-----','\n')

print("Total of pairs bp/ppg: ", len(all_features))
with open(os.path.join(features_path,'all_features_'+time_delta.replace(' ','')+'.json'), 'w') as f:
    json.dump(all_features, f)