In [None]:
import os
import glob
import math
import json
import time
import random
import datetime
import numpy as np
import pandas as pd

# Graphs
import seaborn as sns
import matplotlib.pyplot as plt

# Signal Processing
import scipy.stats as stats
import scipy.signal as sig

# Sklearn
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.dummy import DummyRegressor
from sklearn.pipeline import Pipeline

# Custom Package
import devicely

# Custom Model
import lightgbm as lgb

In [None]:
def get_folders(my_path):
    folder_names = [f for f in os.listdir(my_path) if os.path.isdir(os.path.join(my_path, f))]
    return folder_names
    
def truncate_empatica(data_empatica, bp_df, verbose=False):
    sub = pd.DataFrame()
    for _, row in bp_df.iterrows():
        truncated_df = data_empatica.truncate(before=row['window_start'], after=row['window_end'])
        if verbose:
            truncated_df[['bvp', 'acc_mag']].interpolate(method='index').plot()
        sub = sub.append(truncated_df)
    if verbose:
        print(sub.head())
        print(sub.describe())
        print(sub.shape)
        sub[['bvp', 'acc_mag']].interpolate(method='index').plot()
    return sub

# finds the local minima that correspond to the start of a cardiac cycle
def find_cycle_starts(df, sample_rate=64):
    minima = sig.find_peaks(-df.values, distance=0.7*sample_rate)[0]
    return minima

# returns the x values for those samples in the signal, that are closest to some given y value
def find_xs_for_y(ys, y_val, sys_peak):
    diffs = abs(ys - y_val)
    x1 = diffs[:sys_peak].idxmin()
    x2 = diffs[sys_peak:].idxmin()
    return x1, x2

# takes a dataframe of calculated features and removes the outliers occurring due to inaccuracies in the signal
def clean_window_features_of_outliers(df):
    quant = df.quantile(0.8)
    for col in df.columns:
        if col.find('ts') == -1:
            df = df[df[col] < quant[col]*2]
    return df

# finds sections with high acceleration magnitude and removes them
def remove_motion_sections(df, limit=100, min_size=5, padding=15, std_mult=0.25):
    acc_mag_mean = df['acc_mag'].mean()
    acc_mag_std =  df['acc_mag'].std()
    # comparison with overall mean and std
    thresh_indices = np.squeeze(np.argwhere((df['acc_mag'].values > acc_mag_mean + std_mult * acc_mag_std) | 
                                            (df['acc_mag'].values < acc_mag_mean - std_mult * acc_mag_std)))

    section_indices = []
    section_start = thresh_indices[0]
    for i in range(1, len(thresh_indices) - 1):
        if thresh_indices[i] - thresh_indices[i-1] > limit:
            if thresh_indices[i-1] >= section_start + min_size:
                section_indices.append((section_start - padding, thresh_indices[i-1] + padding))
            section_start = thresh_indices[i]
    if thresh_indices[-1] != section_start:
        section_indices.append((section_start, thresh_indices[-1]))

    section_indices.reverse()
    for (start, end) in section_indices:
        df = df.drop(index=df.iloc[start:end].index)
    return df

def find_clean_cycles_with_template(signal, verbose=False):
    initial_cycle_starts = find_cycle_starts(signal)
    if len(initial_cycle_starts) <= 1:
        return []
    template_length = math.floor(np.median(np.diff(initial_cycle_starts)))
    cycle_starts = initial_cycle_starts[:-1]
    while cycle_starts[-1] + template_length > len(signal):
        cycle_starts = cycle_starts[:-1]
    template = []
    for i in range(template_length):
        template.append(np.mean(signal[cycle_starts + i]))
    
    corr_coef = []
    for cycle_start in cycle_starts:
        corr_coef.append(np.corrcoef(template, signal[cycle_start:cycle_start+template_length])[0,1])

    valid_indices = np.argwhere(np.array(corr_coef) >= 0.8)
    if (len(valid_indices) > len(cycle_starts) / 2) and len(valid_indices) > 1:
        cycle_starts = cycle_starts[np.squeeze(valid_indices)]
        template2 = []
        for i in range(template_length):
            template2.append(np.mean(signal[cycle_starts + i]))
        template = template2
        
    if verbose:
        print('Cycle Template')
        plt.plot(template)
        plt.show()
        
    # Check correlation of cycles with template
    # SQI1: Pearson Correlation
    sqi1_corr = []
    for cycle_start in cycle_starts:
        corr, _ = stats.pearsonr(template, signal[cycle_start:cycle_start+template_length])
        sqi1_corr.append(corr)
        
    # SQI2: Pearson Correlation between the cycle, re-sampled to match the template length, 
    # and the template itself
    sqi2_corr = []
    for cycle_start in cycle_starts:
        cycle_end = initial_cycle_starts[np.squeeze(np.argwhere(initial_cycle_starts==cycle_start)) + 1] 
        corr, _ = stats.pearsonr(template, sig.resample(signal[cycle_start:cycle_end], template_length))
        sqi2_corr.append(corr)
        
    # Filter for correlation >= 0.8
    corrs = np.array([sqi1_corr, sqi2_corr]).transpose()
    cycle_starts = cycle_starts[np.all(corrs >= 0.8, axis=1)]
    
    if verbose:
        print('Detected Valid Cycles')
        fig = plt.figure(figsize=(12, 10), dpi=300)
        for cycle_start in cycle_starts:
            plt.rcParams.update({'font.size': 16})
            plt.plot(signal[cycle_start:cycle_start+template_length].to_numpy())
           
        # Save valid cycles
        with open('../../config.json') as f:
            config = json.load(f)
        today = datetime.datetime.today().strftime('%Y-%m-%d')
        figure_path = config['figures']
    
        millis = int(round(time.time() * 1000))
        valid_cycles = os.path.join(figure_path, today, 'valid_cycles_hype')
        
        if not os.path.exists(valid_cycles):
            os.makedirs(valid_cycles)
        fig.savefig(os.path.join(valid_cycles, str(millis)+'_valid_cycles_hype.png'))
        
    cycles = []
    for cycle_start in cycle_starts:
        cycle_end = initial_cycle_starts[np.squeeze(np.argwhere(initial_cycle_starts==cycle_start)) + 1]
        if (cycle_end - cycle_start) > template_length*1.2:
            cycle_end = cycle_start + template_length
        cycles.append((cycle_start, cycle_end))

    return cycles

# Filter PPG data
def extract_features_for_cycle(window_df, signal, verbose=False):
    cur_index = window_df.index.max() + 1
    if np.isnan(cur_index):
        cur_index = 0
    signal = signal.resample('ms').nearest(limit=1).interpolate(method='time')
    signal = signal - signal.min()
    max_amplitude = signal.max()
    
    peaks = sig.find_peaks(signal.values)[0]
    if len(peaks) == 0:
        return pd.DataFrame()
    sys_peak_ts = signal.index[peaks[0]]
    
    if verbose:
        plt.figure()
        plt.xlim((signal.index.min(), signal.index.max()))
        plt.scatter(signal.index[peaks], signal[peaks])
        plt.plot(signal.index, signal.values)
    # Features
    window_df = window_df.append(pd.DataFrame({'start_ts': signal.index.min(),
                                               'sys_peak_ts': sys_peak_ts,
                                               'T_S': (sys_peak_ts - signal.index.min()).total_seconds(),
                                               'T_D': (signal.index.max() - sys_peak_ts).total_seconds()
                                              }, index=[cur_index]), sort=False)
    for p in [10, 25, 33, 50, 66, 75]:
        p_ampl = p / 100 * max_amplitude
        x1, x2 = find_xs_for_y(signal, p_ampl, peaks[0])
        if verbose:
            plt.scatter([x1, x2], signal[[x1, x2]])
        window_df.loc[cur_index, 'DW_'+str(p)] = (x2 - sys_peak_ts).total_seconds()
        window_df.loc[cur_index, 'DW_SW_sum_'+str(p)] = (x2 - x1).total_seconds()
        window_df.loc[cur_index, 'DW_SW_ratio_'+str(p)] = (x2 - sys_peak_ts) / (sys_peak_ts - x1)
    if verbose:
        plt.show()
    return window_df
    
def extract_features_for_window(df, verbose=False):
    cycles = find_clean_cycles_with_template(df['bvp_filtered'], verbose=verbose)
    if len(cycles) == 0:
        return pd.DataFrame()
    
    window_features = pd.DataFrame()
    cur_index = 0
    for i in range(len(cycles)):
        window_features = extract_features_for_cycle(window_features, df['bvp_filtered'].iloc[cycles[i][0]:cycles[i][1]], verbose=verbose)
        if window_features.empty:
            continue
        if i > 0:
            if (cur_index-1) in window_features.index:
                window_features.loc[cur_index-1, 'CP'] = (window_features.loc[cur_index, 'sys_peak_ts'] - window_features.loc[cur_index-1, 'sys_peak_ts']).total_seconds()
            else:
                window_features.loc[cur_index-1, 'CP'] = None
        cur_index = cur_index + 1
    if verbose:
        print('Cycle Features within Window:')
        print(window_features)
    window_features = clean_window_features_of_outliers(window_features)
    return window_features

def apply_filter(df, filter_type='norm', fs=64):
    if len(df['bvp']) <= 27:
        df['bvp_filtered'] = df['bvp']
        return df['bvp_filtered']
    elif filter_type == 'norm':
        # No smoothing neccessary due to relatively low sampling frequency
        df['bvp_filtered'] = (df['bvp'] - df['bvp'].min()) / (df['bvp'].max() - df['bvp'].min())
    elif filter_type == 'cheby':
        df['bvp'] = (df['bvp'] - df['bvp'].min()) / (df['bvp'].max() - df['bvp'].min())
        sos = sig.cheby2(4, 20, [0.5, 8], btype='bandpass', fs=fs, output='sos')
        df['bvp_filtered'] = sig.sosfiltfilt(sos, df['bvp'])
    elif filter_type == 'butter':
        df['bvp'] = (df['bvp'] - df['bvp'].min()) / (df['bvp'].max() - df['bvp'].min())
        sos = sig.butter(4, [0.5, 8], btype='bandpass', fs=fs, output='sos')
        df['bvp_filtered'] = sig.sosfiltfilt(sos, df['bvp'])
    return df

def extract_features_for_signal(signal, bp, fs, verbose=False):
    for index, row in bp.iterrows():
        window_df = signal.truncate(before=row['window_start'], after=row['window_end'])
        if window_df.empty or window_df.shape[0] <= fs:
            continue
        window_features = extract_features_for_window(window_df, verbose)
        for col in window_features.columns:
            if col.find('ts') == -1:
                bp.loc[index, col+'_mean'] = window_features[col].mean()
                bp.loc[index, col+'_var'] = window_features[col].var()
    bp.dropna(inplace=True, how='any')
    return bp

In [None]:
def extract_features(csv=True, bp_monitor='spacelabs', timeshift=2, 
                     time_delta='15 seconds', 
                     time_delta_type='bfill', 
                     experiment_type='biking', 
                     motion_filter=False, special_filter='cheby', verbose=False):
    
    with open('../../config.json') as f:
        config = json.load(f)

    today = datetime.datetime.today().strftime('%Y-%m-%d')
    time_delta_modified = time_delta.split(' ')
    time_delta_dict = {time_delta_modified[1]: int(time_delta_modified[0])}

    exp_base_path = config['hype']
    figure_path = config['figures']
    
    if verbose:
        print(exp_base_path)
        print(figure_path)
        print('\n')

    dates = get_folders(exp_base_path)
    all_features = pd.DataFrame()
    
    # Create features path if it not exists
    features_path = os.path.join('..', '..', 'features', 'hype', today, experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower())
    # Filter motion
    if motion_filter:
        motion_path = os.path.join(features_path, 'motion-filtered')
    else:
        motion_path = os.path.join(features_path, 'motion-not-filtered')

    print('Extracting Features: ', today, experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower()+'-motion: '+str(motion_filter))
    if not os.path.exists(motion_path):
        os.makedirs(motion_path)
    
    all_features_path = motion_path+'/all_features_{}_{}.csv'.format(experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower())
    if (csv == True) and (os.path.exists(all_features_path)):
        print('Features previously extracted.')
        return all_features_path
    
    dates.sort()
    for date in dates:
        print(date)
        subjects = get_folders(os.path.join(exp_base_path, date))
        subjects = [int(x) for x in subjects]
        subjects.sort()
        for subject in subjects:
            subject = str(subject)
            print("Subject: ", subject)
            if experiment_type == 'biking':
                patient_base_path = os.path.join(exp_base_path, date, subject)
                tag = os.path.join(patient_base_path, 'Tags')
        
                # Check for the Tags
                if not os.path.exists(tag):
                    print('No Tag File.')
                    print('-----','\n')
                    continue           
            else:
                patient_base_path = os.path.join(exp_base_path, date, subject, experiment_type)

            # Sources
            sources = {
                        'empatica' : glob.glob(patient_base_path+r'/Empatica*').pop()
                      }

            if bp_monitor == 'spacelabs':
                sources['spacelabs'] = glob.glob(patient_base_path+r'/*SpaceLabs*').pop()

                if not os.path.exists(sources['spacelabs']):
                    if verbose: print("Subject has no spacelabs file")
                    break
                 # Read Spacelabs
                for file in os.listdir(sources['spacelabs']):
                    if file.endswith(".abp"):
                        spacelabs_file = os.path.join(sources['spacelabs'], file)
                        break
                
                bp = devicely.SpacelabsReader(spacelabs_file)
                bp.drop_EB()
                bp.timeshift(pd.Timedelta(-timeshift, unit='H'))
                bp.set_window(datetime.timedelta(**time_delta_dict), time_delta_type)
                bp.data['subject'] = bp.subject

                # Adjust columns
                bp_df = bp.data.drop(['error','z','x','y'], axis=1).reset_index().copy()
                bp_df = bp_df[bp_df['DIA(mmHg)'] > 10]
                print("BP values: ", bp_df.shape[0])
                      
            if verbose: print(bp_df.head(1))

            # Read Empatica
            empatica = devicely.EmpaticaReader(sources['empatica'])
            subset_empatica = empatica.data[['bvp','acc_mag']].dropna(how='all').copy()
            if verbose: print(subset_empatica.head(1))

            # Truncate data
            sub_data_empatica = truncate_empatica(subset_empatica, bp_df, verbose=verbose)
            if verbose: print("Truncated data: ", sub_data_empatica.shape)

            # Apply filters, e. g. normalise, cheby, butter
            if special_filter:
                sub_data_empatica = apply_filter(sub_data_empatica, special_filter, fs=64)
                if verbose: print("Filtered ["+special_filter+"] mean bvp: ", sub_data_empatica['bvp_filtered'].mean())
            else:
                sub_data_empatica['bvp_filtered'] = sub_data_empatica['bvp']
                if verbose: print("Not filtered.")
                    
            # Filter motion
            if motion_filter:
                sub_data_empatica = remove_motion_sections(sub_data_empatica)
                if verbose: print("Motionless data: ", sub_data_empatica.shape)
                motion_path = os.path.join(features_path,'motion-filtered')
            else:
                motion_path = os.path.join(features_path,'motion-not-filtered')

            features = extract_features_for_signal(sub_data_empatica, bp_df, fs=64, verbose=verbose)

            if 'T_S_mean' not in features:
                print('No features.')
                print('-----','\n')
                continue

            if verbose: print('Features: ', features.shape)
            
            if csv:
                features.to_csv(motion_path+'/features_{}_{}_{}.csv'.format(subject, experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower()), index=False)
            if verbose: print(features)

            if all_features.empty:
                all_features = features
            else:
                all_features = all_features.append(features)

            if verbose: print('-----','\n')

    all_features['bp_monitor'] = bp_monitor
    all_features['timeshift'] = timeshift
    all_features['time_delta'] = time_delta
    all_features['time_delta_type'] = time_delta_type
    all_features['experiment_type'] = experiment_type
    all_features['motion_filter'] = motion_filter
    all_features['special_filter'] = special_filter
    
    # Adding sorting by subject
    if 'subject' in all_features:
        all_features['subject'] = pd.to_numeric(all_features['subject'])
        all_features.sort_values(by='subject', inplace=True)   
        if csv:
            all_features.to_csv(all_features_path, index=False)
    
    print('Amount of BP-Pairs extracted: ', all_features.shape)
    print('Features Extracted.')
    print('-----','\n')
    
    if csv:
        return all_features_path
    else:
        return all_features

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def drop_correlation(df, labels, threshold = 0.95, plotcorr = False):
    corr = df.loc[:, ~df.columns.isin(labels)].corr()
    if plotcorr: 
        f, ax = plt.subplots(figsize=(15, 15))
        cmap = sns.diverging_palette(220, 10, as_cmap=True)
        sns.heatmap(corr, cmap = cmap,
                xticklabels=corr.columns.values,
                yticklabels=corr.columns.values)
    # Select upper triangle of correlation matrix
    upper = corr.abs().where(np.triu(np.ones(corr.shape), k=1).astype(bool))
    # Find features with correlation greater than threshold
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    print("Dataframe Shape: " + str(df.shape))
    print("Columns dropped: ", len(to_drop))
    # Drop features 
    new_df = df.drop(columns = to_drop)
    print("New Dataframe Shape: " + str(new_df.shape))
    return(new_df)

def predict_bp_from_ppg(dataframe, predicted_variable = 'SBP', k = 1, correlation_threshold = 0.95, 
                        random_seed = 42, learning_rate = 0.01, n_estimators = 100, 
                        alpha = 1, l1_ratio = 0.5, random_state = 42, 
                        epochs = 50, batch_size = 5, n_jobs = -1, max_depth = 10, verbose = False):
    
    df = dataframe.rename(columns={"SYS(mmHg)": "SBP", "DIA(mmHg)": "DBP", 'subject': 'patientid'})
    cols_dropped = ['timestamp', 'date', 'time', 'window_end', 'window_start']
    if verbose: print("Cols Dropped: ", cols_dropped)
    df.drop(cols_dropped, axis=1, inplace=True)
    
    # Dropping Correlation
    df.drop(df.loc[(df['SBP'] == 0)|(df['DBP'] == 0)].index, inplace = True)
    df = drop_correlation(df, ['SBP', 'DBP'], correlation_threshold, plotcorr = False)
    if verbose: print(df.shape)

    input_shape = df.shape[1]-3
    print('Nr of features: ', input_shape)
    patient_ids = np.unique(df['patientid'])

    estimators_lr = []
    estimators_lr.append(('standardize', StandardScaler()))
    estimators_lr.append(('lr', ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=random_state)))
    pipeline_lr = Pipeline(estimators_lr)

    estimators_gbm = []
    estimators_gbm.append(('standardize', StandardScaler()))
    estimators_gbm.append(('gbm', GradientBoostingRegressor(learning_rate=learning_rate, n_estimators=n_estimators, random_state=random_seed)))
    pipeline_gbm = Pipeline(estimators_gbm)
    
    estimators_lgbm = []
    estimators_lgbm.append(('standardize', StandardScaler()))
    estimators_lgbm.append(('lgbm', lgb.LGBMRegressor(learning_rate=learning_rate, n_estimators=n_estimators, random_state=random_seed, n_jobs=n_jobs)))
    pipeline_lgbm = Pipeline(estimators_lgbm)
    
    estimators_rf = []
    estimators_rf.append(('standardize', StandardScaler()))
    estimators_rf.append(('rf', RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state, n_jobs=n_jobs)))
    pipeline_rf = Pipeline(estimators_rf)

    RMSE_LR = []
    MAPE_LR = []
    MAE_LR = []

    RMSE_GBM = []
    MAPE_GBM = []
    MAE_GBM = []
    
    RMSE_LGBM = []
    MAPE_LGBM = []
    MAE_LGBM = []
    
    RMSE_RF = []
    MAPE_RF = []
    MAE_RF = []
        
    RMSE_DUMMY = []
    MAPE_DUMMY = []
    MAE_DUMMY = []
    
    results = {}
    i = 0
    mean_train = 0
    mean_test = 0
    total = len(df.index)
    subjects = len(df['patientid'].unique())
    
    if verbose: print("BPPairs: ", total)
    if verbose: print("Subjects: ", subjects)
    if verbose: print("\n")

    while len(patient_ids) > 1:
        i= i + 1 

        # Random Seed
        random.seed(random_seed)

        patient_test_ids = random.choices(patient_ids, k = k)
        patient_ids = [e for e in patient_ids if e not in patient_test_ids]
        df_test = df.loc[df['patientid'].isin(patient_test_ids)].dropna()
        df_train = df[~df['patientid'].isin(patient_test_ids)].dropna()
        if verbose: print("Running fold" + str(i))
        if verbose: print("Train: ", df_train.shape)
        mean_train += len(df_train.index)
        if verbose: print("Test: ", df_test.shape)
        if verbose: print("Total: ", len(df_test.index) + len(df_train.index))
        mean_test += len(df_test.index)
        if verbose: print("\n")

        cols_dropped = ['patientid']

        if predicted_variable == 'SBP':
            cols_dropped.append('DBP')
        elif predicted_variable == 'DBP':
            cols_dropped.append('SBP')
        df_train = df_train.drop(columns = cols_dropped)
        df_test = df_test.drop(columns = cols_dropped)

        ##lr
        pipeline_lr.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, 
                        y = df_train[predicted_variable].values)
        predicted_labels = pipeline_lr.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_LR.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_LR.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_LR.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))

        #gbm 
        pipeline_gbm.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, 
                         y = df_train[predicted_variable].values)
        predicted_labels = pipeline_gbm.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_GBM.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_GBM.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_GBM.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
        
        #lightgbm
        pipeline_lgbm.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, y = df_train[predicted_variable].values)
        predicted_labels = pipeline_lgbm.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_LGBM.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_LGBM.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_LGBM.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
        
        #rf
        pipeline_rf.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, y = df_train[predicted_variable].values)
        predicted_labels = pipeline_rf.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_RF.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_RF.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_RF.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
        
        #dummy_mean
        dummy_mean = DummyRegressor(strategy='mean')
        dummy_mean.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, 
                         y = df_train[predicted_variable].values)
        predicted_labels = dummy_mean.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_DUMMY.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_DUMMY.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_DUMMY.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
    
    if i == 0:
        print('No folds.')
        return 0
    
    # General Info
    results['subjects'] = subjects
    results['bp_pairs'] = total
    results['folders'] = i
    results['mean_train_size'] = round(mean_train/i)
    results['mean_test_size'] = round(mean_test/i)
    
    # Mean LR
    results['RMSE_LR_MEAN'] = np.mean(np.array(RMSE_LR))
    results['MAPE_LR_MEAN'] = np.mean(np.array(MAPE_LR))
    results['MAE_LR_MEAN'] = np.mean(np.array(MAE_LR))
            
    # STD LR
    results['RMSE_LR_STD'] = np.std(np.array(RMSE_LR))
    results['MAPE_LR_STD'] = np.std(np.array(MAPE_LR))
    results['MAE_LR_STD'] = np.std(np.array(MAE_LR))

    # Mean GBM
    results['RMSE_GBM_MEAN'] = np.mean(np.array(RMSE_GBM))
    results['MAPE_GBM_MEAN'] = np.mean(np.array(MAPE_GBM))
    results['MAE_GBM_MEAN'] = np.mean(np.array(MAE_GBM))
    
    # Std GBM
    results['RMSE_GBM_STD'] = np.std(np.array(RMSE_GBM))
    results['MAPE_GBM_STD'] = np.std(np.array(MAPE_GBM))
    results['MAE_GBM_STD'] = np.std(np.array(MAE_GBM))
    
    # Mean LGBM
    results['RMSE_LGBM_MEAN'] = np.mean(np.array(RMSE_LGBM))
    results['MAPE_LGBM_MEAN'] = np.mean(np.array(MAPE_LGBM))
    results['MAE_LGBM_MEAN'] = np.mean(np.array(MAE_LGBM))
    
    # Std LGBM
    results['RMSE_LGBM_STD'] = np.std(np.array(RMSE_LGBM))
    results['MAPE_LGBM_STD'] = np.std(np.array(MAPE_LGBM))
    results['MAE_LGBM_STD'] = np.std(np.array(MAE_LGBM))
    
    # Mean RF
    results['RMSE_RF_MEAN'] = np.mean(np.array(RMSE_RF))
    results['MAPE_RF_MEAN'] = np.mean(np.array(MAPE_RF))
    results['MAE_RF_MEAN'] = np.mean(np.array(MAE_RF))
    
    # Std RF
    results['RMSE_RF_STD'] = np.std(np.array(RMSE_RF))
    results['MAPE_RF_STD'] = np.std(np.array(MAPE_RF))
    results['MAE_RF_STD'] = np.std(np.array(MAE_RF))
    
    # Mean Dummy
    results['RMSE_DUMMY_MEAN'] = np.mean(np.array(RMSE_DUMMY))
    results['MAPE_DUMMY_MEAN'] = np.mean(np.array(MAPE_DUMMY))
    results['MAE_DUMMY_MEAN'] = np.mean(np.array(MAE_DUMMY))
    
    # Std Dummy
    results['RMSE_DUMMY_STD'] = np.std(np.array(RMSE_DUMMY))
    results['MAPE_DUMMY_STD'] = np.std(np.array(MAPE_DUMMY))
    results['MAE_DUMMY_STD'] = np.std(np.array(MAE_DUMMY))
    
    parameters = {
        'predicted_variable' : predicted_variable,
        'correlation_threshold' : correlation_threshold,
        'random_seed' :  random_seed,
        'learning_rate' : learning_rate, 
        'n_estimators' : n_estimators, 
        'alpha' : alpha, 
        'l1_ratio' : l1_ratio,
        'random_state' : random_state, 
        'k' : k, 
        'features' : input_shape, 
        'epochs' : epochs, 
        'batch_size' : batch_size,
        'max_depth' : max_depth,
        'n_jobs' : n_jobs,
    }    
    results.update(parameters)
                         
    if verbose: print("\n")
    if verbose: print("Number of folds: ", results['folders'])
    if verbose: print("Mean train size: ", results['mean_train_size'])
    if verbose: print("Mean test size: ", results['mean_test_size'])
    return results

In [None]:
feature_parameters = {
                        'csv' : True,
                        'bp_monitor' : 'spacelabs', 
                        'timeshift' : 2,
                        'verbose' : False,
                     }

time_deltas = ['15 seconds', '30 seconds', '45 seconds']
time_delta_types = ['bffill', 'bfill']
experiment_types = ['biking', '24 Hours']
motion_filters = [True, False]
special_filters = ['norm', 'butter', 'cheby']

# Multiple Experiments

In [None]:
all_paths = []
for time_delta_type in time_delta_types:
    for time_delta in time_deltas:
        for experiment_type in experiment_types:
            for motion_filter in motion_filters:
                for special_filter in special_filters:
                    feature_parameters_variable = {
                                            'time_delta' : time_delta, 
                                            'time_delta_type' : time_delta_type,
                                            'experiment_type' : experiment_type, 
                                            'motion_filter' : motion_filter,
                                            'special_filter' : special_filter
                                            }
                
                    feature_parameters.update(feature_parameters_variable)

                    path = extract_features(feature_parameters['csv'], feature_parameters['bp_monitor'], feature_parameters['timeshift'],
                                  feature_parameters['time_delta'], feature_parameters['time_delta_type'], feature_parameters['experiment_type'], 
                                  feature_parameters['motion_filter'], feature_parameters['special_filter'], feature_parameters['verbose']
                                 )

                    all_paths.append(path)

In [None]:
print(len(all_paths))

# Read Paths

In [None]:
all_paths = []
base = 'hype'
date = datetime.datetime.today().strftime('%Y-%m-%d')

for time_delta_type in time_delta_types:
    for time_delta in time_deltas:
        for experiment_type in experiment_types:
            for special_filter in special_filters:
                for motion_filter in motion_filters:
                        if motion_filter:
                            motion_path_name = 'motion-filtered'
                        else:
                            motion_path_name = 'motion-not-filtered'
                        features_path = os.path.join('..', '..', 'features', base, date, experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower())
                        motion_path = os.path.join(features_path, motion_path_name)
                        path = os.path.join(motion_path,'all_features_{}_{}.csv'.format(experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower()))
                        all_paths.append(path)
print(len(all_paths))

# Experiments

In [None]:
experiments = []

for path in all_paths:
    print("Predicting for: ", path)
    df = pd.read_csv(path)
    print(df.shape)
    if not df.empty:
        predicted_variables = ['SBP', 'DBP']
        ks = [1, 2, 3]
        
        correlation_threshold = 0.75
        if df['experiment_type'].unique()[0] == 'biking':
            correlation_threshold = 0.6
        
        features = {
                    'time_delta' : df['time_delta'].unique()[0], 
                    'time_delta_type' : df['time_delta_type'].unique()[0],
                    'experiment_type' : df['experiment_type'].unique()[0], 
                    'motion_filter' : df['motion_filter'].unique()[0],
                    'bp_monitor' : df['bp_monitor'].unique()[0], 
                    'timeshift' : df['timeshift'].unique()[0],
                    'special_filter' : df['special_filter'].unique()[0]
                    }
        
        df.drop(features.keys(), axis=1, inplace=True)
        for variable in predicted_variables:
            for k in ks:
                results = predict_bp_from_ppg(df, predicted_variable = variable, 
                                              k = k, correlation_threshold = correlation_threshold
                                             )
                if results != 0:
                    results.update(features)
                    experiments.append(results)
    else:
        ("Dataframe was empty: ", path)
all_experiments = pd.DataFrame.from_dict(experiments)

# Saving the Results

In [None]:
all_experiments = all_experiments.replace({'motion_filter': {True : 'yes', False: 'no'}})
all_experiments.head(2)

In [None]:
date = datetime.datetime.today().strftime('%Y-%m-%d')
all_experiments.to_csv('../../results/'+date+'_results_hype.csv', index=True, mode='w')

## Read Experiments From Results File and Exploring the Data

In [None]:
date = datetime.datetime.today().strftime('%Y-%m-%d')

In [None]:
results_path = '../../results/'+date+'_results_hype.csv'
all_experiments = pd.read_csv(results_path)

In [None]:
all_experiments.boxplot(column=['MAE_GBM_MEAN'], by='predicted_variable', figsize=(15,8))

In [None]:
all_experiments.boxplot(column=['MAE_DUMMY_MEAN'], by='predicted_variable', figsize=(15,8))

In [None]:
all_experiments.boxplot(column=['MAE_LGBM_MEAN'], by='predicted_variable', figsize=(15,8))

In [None]:
all_experiments.boxplot(column=['MAE_LR_MEAN'], by='predicted_variable', figsize=(15,8))

In [None]:
all_experiments.boxplot(column=['MAE_RF_MEAN'], by='predicted_variable', figsize=(15,8))

In [None]:
gbm = all_experiments[['predicted_variable','experiment_type','MAE_GBM_MEAN','MAE_GBM_STD','MAPE_GBM_MEAN','RMSE_GBM_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_GBM_MEAN'])
lr = all_experiments[['predicted_variable','experiment_type','MAE_LR_MEAN','MAE_LR_STD','MAPE_LR_MEAN','RMSE_LR_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_LR_MEAN'])
lgbm = all_experiments[['predicted_variable','experiment_type','MAE_LGBM_MEAN','MAE_LGBM_STD','MAPE_LGBM_MEAN','RMSE_LGBM_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_LGBM_MEAN'])
rf = all_experiments[['predicted_variable','experiment_type','MAE_RF_MEAN','MAE_RF_STD','MAPE_RF_MEAN','RMSE_RF_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_RF_MEAN'])
d = all_experiments[['predicted_variable','experiment_type','MAE_DUMMY_MEAN','MAE_DUMMY_STD','MAPE_DUMMY_MEAN','RMSE_DUMMY_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN'])

## Best Results

In [None]:
# min 24h dbp
gbm_min_24_dbp = gbm.loc[(gbm['experiment_type'] == '24 Hours') & (gbm['predicted_variable'] == 'DBP') & (gbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_GBM_MEAN','MAE_GBM_STD']]
lgbm_min_24_dbp = lgbm.loc[(lgbm['experiment_type'] == '24 Hours') & (lgbm['predicted_variable'] == 'DBP') & (lgbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LGBM_MEAN','MAE_LGBM_STD']]
rf_min_24_dbp = rf.loc[(rf['experiment_type'] == '24 Hours') & (rf['predicted_variable'] == 'DBP') & (rf['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_RF_MEAN','MAE_RF_STD']]
lr_min_24_dbp = lr.loc[(lr['experiment_type'] == '24 Hours') & (lr['predicted_variable'] == 'DBP') & (lr['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LR_MEAN','MAE_LR_STD']]
dummy_min_24_dbp = d.loc[(d['experiment_type'] == '24 Hours') & (d['predicted_variable'] == 'DBP') & (d['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN','MAE_DUMMY_STD']]

min_24_dbp = gbm_min_24_dbp.set_index(['predicted_variable','experiment_type','k']).join(lgbm_min_24_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_24_dbp = min_24_dbp.join(rf_min_24_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_24_dbp = min_24_dbp.join(lr_min_24_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_24_dbp = min_24_dbp.join(dummy_min_24_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])

# min 24h sbp
gbm_min_24_sbp = gbm.loc[(gbm['experiment_type'] == '24 Hours') & (gbm['predicted_variable'] == 'SBP') & (gbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_GBM_MEAN','MAE_GBM_STD']]
lgbm_min_24_sbp = lgbm.loc[(lgbm['experiment_type'] == '24 Hours') & (lgbm['predicted_variable'] == 'SBP') & (lgbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LGBM_MEAN','MAE_LGBM_STD']]
rf_min_24_sbp = rf.loc[(rf['experiment_type'] == '24 Hours') & (rf['predicted_variable'] == 'SBP') & (rf['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_RF_MEAN','MAE_RF_STD']]
lr_min_24_sbp = lr.loc[(lr['experiment_type'] == '24 Hours') & (lr['predicted_variable'] == 'SBP') & (lr['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LR_MEAN','MAE_LR_STD']]
dummy_min_24_sbp = d.loc[(d['experiment_type'] == '24 Hours') & (d['predicted_variable'] == 'SBP') & (d['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN','MAE_DUMMY_STD']]

min_24_sbp = gbm_min_24_sbp.set_index(['predicted_variable','experiment_type','k']).join(lgbm_min_24_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_24_sbp = min_24_sbp.join(rf_min_24_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_24_sbp = min_24_sbp.join(lr_min_24_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_24_sbp = min_24_sbp.join(dummy_min_24_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])

# min biking dbp
gbm_min_biking_dbp = gbm.loc[(gbm['experiment_type'] == 'biking') & (gbm['predicted_variable'] == 'DBP') & (gbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_GBM_MEAN','MAE_GBM_STD']]
lgbm_min_biking_dbp = lgbm.loc[(lgbm['experiment_type'] == 'biking') & (lgbm['predicted_variable'] == 'DBP') & (lgbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LGBM_MEAN','MAE_LGBM_STD']]
rf_min_biking_dbp = rf.loc[(rf['experiment_type'] == 'biking') & (rf['predicted_variable'] == 'DBP') & (rf['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_RF_MEAN','MAE_RF_STD']]
lr_min_biking_dbp = lr.loc[(lr['experiment_type'] == 'biking') & (lr['predicted_variable'] == 'DBP') & (lr['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LR_MEAN','MAE_LR_STD']]
dummy_min_biking_dbp = d.loc[(d['experiment_type'] == 'biking') & (d['predicted_variable'] == 'DBP') & (d['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN','MAE_DUMMY_STD']]

min_biking_dbp = gbm_min_biking_dbp.set_index(['predicted_variable','experiment_type','k']).join(lgbm_min_biking_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_biking_dbp = min_biking_dbp.join(rf_min_biking_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_biking_dbp = min_biking_dbp.join(lr_min_biking_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_biking_dbp = min_biking_dbp.join(dummy_min_biking_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])

# min biking sbp
gbm_min_biking_sbp = gbm.loc[(gbm['experiment_type'] == 'biking') & (gbm['predicted_variable'] == 'SBP') & (gbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_GBM_MEAN','MAE_GBM_STD']]
lgbm_min_biking_sbp = lgbm.loc[(lgbm['experiment_type'] == 'biking') & (lgbm['predicted_variable'] == 'SBP') & (lgbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LGBM_MEAN','MAE_LGBM_STD']]
rf_min_biking_sbp = rf.loc[(rf['experiment_type'] == 'biking') & (rf['predicted_variable'] == 'SBP') & (rf['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_RF_MEAN','MAE_RF_STD']]
lr_min_biking_sbp = lr.loc[(lr['experiment_type'] == 'biking') & (lr['predicted_variable'] == 'SBP') & (lr['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LR_MEAN','MAE_LR_STD']]
dummy_min_biking_sbp = d.loc[(d['experiment_type'] == 'biking') & (d['predicted_variable'] == 'SBP') & (d['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN','MAE_DUMMY_STD']]

min_biking_sbp = gbm_min_biking_sbp.set_index(['predicted_variable','experiment_type','k']).join(lgbm_min_biking_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_biking_sbp = min_biking_sbp.join(rf_min_biking_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_biking_sbp = min_biking_sbp.join(lr_min_biking_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_biking_sbp = min_biking_sbp.join(dummy_min_biking_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])

best_results = pd.concat([min_biking_dbp, min_24_dbp, min_biking_sbp, min_24_sbp], axis=0)
best_results

In [None]:
date = datetime.datetime.today().strftime('%Y-%m-%d')
best_results.to_csv('../../results/'+date+'_best_results_hype.csv', index=True, mode='w')

## Group By Experiments

In [None]:
lr.groupby(['predicted_variable','experiment_type', 'k']).mean()

In [None]:
rf.groupby(['predicted_variable','experiment_type', 'k']).mean()

In [None]:
gbm.groupby(['predicted_variable','experiment_type', 'k']).mean()

In [None]:
lgbm.groupby(['predicted_variable','experiment_type', 'k']).mean()

In [None]:
d.groupby(['predicted_variable','experiment_type', 'k']).mean()

In [None]:
gbm.groupby(['motion_filter', 'special_filter', 'time_delta', 'time_delta_type', 'k']).mean().sort_values(by=['MAE_GBM_MEAN', 'MAE_GBM_STD', 'MAPE_GBM_MEAN']).head(2)

In [None]:
lgbm.groupby(['motion_filter', 'special_filter', 'time_delta', 'time_delta_type', 'k']).mean().sort_values(by=['MAE_LGBM_MEAN', 'MAE_LGBM_STD', 'MAPE_LGBM_MEAN']).head(2)

In [None]:
rf.groupby(['motion_filter', 'special_filter', 'time_delta', 'time_delta_type', 'k']).mean().sort_values(by=['MAE_RF_MEAN', 'MAE_RF_STD', 'MAPE_RF_MEAN']).head(2)

In [None]:
d.groupby(['motion_filter', 'special_filter', 'time_delta', 'time_delta_type', 'k']).mean().sort_values(by=['MAE_DUMMY_MEAN', 'MAE_DUMMY_STD', 'MAPE_DUMMY_MEAN']).head(2)

In [None]:
lr.groupby(['motion_filter', 'special_filter', 'time_delta', 'time_delta_type', 'k']).mean().sort_values(by=['MAE_LR_MEAN', 'MAE_LR_STD', 'MAPE_LR_MEAN']).head(2)

# Boxplots

In [None]:
with open('../../config.json') as f:
        config = json.load(f)

results = results_path.split('/').pop()[:-4]
figure_path = os.path.join(config['figures'], results)

if not os.path.exists(figure_path):
        os.makedirs(figure_path)

In [None]:
lgbm_k_1_biking = lgbm.loc[(lgbm['k'] == 1) & (lgbm['experiment_type'] == 'biking')].copy()
lgbm_k_1_biking.rename(columns={"MAE_LGBM_MEAN": "MAE (LGBM) - Stress Test", 
                               "time_delta": "Time Window", "time_delta_type": "Time Window Type",
                              "special_filter": "Filters", "motion_filter": "Motion Removal"}, inplace=True)


boxplot = lgbm_k_1_biking.boxplot(column=['MAE (LGBM) - Stress Test'], by='Time Window', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path, 'biking-time-window.png'),
                    format='png',
                    dpi=300)

In [None]:
lgbm_k_1_24 = lgbm.loc[(lgbm['k'] == 1) & (lgbm['experiment_type'] == '24 Hours')].copy()
lgbm_k_1_24.rename(columns={"MAE_LGBM_MEAN": "MAE (LGBM) - 24 Hours", 
                            "time_delta": "Time Window", "time_delta_type": "Time Window Type",
                            "special_filter": "Filters", "motion_filter": "Motion Removal"}, inplace=True)

boxplot = lgbm_k_1_24.boxplot(column=['MAE (LGBM) - 24 Hours'], by='Time Window', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path, '24-time-window.png'),
                    format='png',
                    dpi=300)

In [None]:
boxplot = lgbm_k_1_biking.boxplot(column=['MAE (LGBM) - Stress Test'], by='Motion Removal', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path, 'biking-motion.png'),
                    format='png',
                    dpi=300)

In [None]:
boxplot = lgbm_k_1_24.boxplot(column=['MAE (LGBM) - 24 Hours'], by='Motion Removal', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path,'24-motion.png'),
                    format='png',
                    dpi=300)

In [None]:
boxplot = lgbm_k_1_biking.boxplot(column=['MAE (LGBM) - Stress Test'], by='Filters', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path,'biking-filters.png'),
                    format='png',
                    dpi=300)

In [None]:
boxplot = lgbm_k_1_24.boxplot(column=['MAE (LGBM) - 24 Hours'], by='Filters', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path,'24-filters.png'),
                    format='png',
                    dpi=300)

In [None]:
boxplot = lgbm_k_1_biking.boxplot(column=['MAE (LGBM) - Stress Test'], by='Time Window Type', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path,'biking-window-type.png'),
                    format='png',
                    dpi=300)

In [None]:
boxplot = lgbm_k_1_24.boxplot(column=['MAE (LGBM) - 24 Hours'], by='Time Window Type', figsize=(15,8))
boxplot.figure.savefig(os.path.join(figure_path,'24-window-type.png'),
                    format='png',
                    dpi=300)

# Single Experiment

In [None]:
feature_parameters = {
                        'csv' : False,
                        'bp_monitor' : 'spacelabs',
                        'timeshift' : 2,
                        'time_delta' : '15 seconds',
                        'time_delta_type': 'bfill',   
                        'experiment_type' : 'biking',
                        'motion_filter' : False,
                        'special_filter' : 'cheby',
                        'verbose' : False
                        }

df = extract_features(feature_parameters['csv'], feature_parameters['bp_monitor'], feature_parameters['timeshift'],
                      feature_parameters['time_delta'], feature_parameters['time_delta_type'], feature_parameters['experiment_type'], 
                      feature_parameters['motion_filter'], feature_parameters['special_filter'], verbose=feature_parameters['verbose']
                     )

In [None]:
df_experiment = df.copy()

In [None]:
df_experiment.head()

In [None]:
df_experiment = df.drop(columns=['bp_monitor', 'timeshift', 'time_delta', 'time_delta_type', 
                                 'experiment_type', 'motion_filter', 'special_filter'])
experiments = pd.DataFrame()

results = predict_bp_from_ppg(df_experiment, predicted_variable='SBP', k=1)
results.update(feature_parameters)
experiments = experiments.append(results, ignore_index=True)
experiments

In [None]:
df_experiment = df.drop(columns=['bp_monitor', 'timeshift', 'time_delta', 'time_delta_type', 
                                 'experiment_type', 'motion_filter', 'special_filter'])
experiments = pd.DataFrame()

results = predict_bp_from_ppg(df_experiment, predicted_variable='DBP', k=1)
results.update(feature_parameters)
experiments = experiments.append(results, ignore_index=True)
experiments