### Processed Dataset to be used as Input
Morassi Sasso, Ariane (2020): Processed EVAL Dataset (30 secs window - min_max normalized - bfill). figshare. Dataset. https://doi.org/10.6084/m9.figshare.12649691
`BFILL` means the window was selected before the start of the measurement.

In [None]:
import os
import math
import json
import time
import random
import datetime
import numpy as np
import pandas as pd

# Graphs
import seaborn as sns
import matplotlib.pyplot as plt

# Custom Packages
import devicely

# Signal Processing
import scipy.stats as stats
import scipy.signal as sig

# Sklearn
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier, RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.dummy import DummyRegressor
from sklearn.pipeline import Pipeline

# Custom Models
import lightgbm as lgb

In [None]:
# Finds the local minima that correspond to the starts of a cardiac cycle
def find_cycle_starts(df, sample_rate=1000):
    minima = sig.find_peaks(-df.values, distance=0.7*sample_rate)[0]
    return minima

# Returns the x values for those samples in the signal, that are closest to some given y value
def find_xs_for_y(ys, y_val, sys_peak):
    diffs = abs(ys - y_val)
    x1 = diffs[:sys_peak].idxmin()
    x2 = diffs[sys_peak:].idxmin()
    return x1, x2

# Takes a dataframe of calculated features and removes the outliers occurring due to inaccuracies in the signal
def clean_window_features_of_outliers(df):
    quant = df.quantile(0.8)
    for col in df.columns:
        if col.find('ts') == -1:
            df = df[df[col] < quant[col]*2]
    return df

def find_clean_cycles_with_template(signal, verbose=False):
    initial_cycle_starts = find_cycle_starts(signal)
    if len(initial_cycle_starts) <= 1:
        return []
    template_length = math.floor(np.median(np.diff(initial_cycle_starts)))
    cycle_starts = initial_cycle_starts[:-1]
    while cycle_starts[-1] + template_length > len(signal):
        cycle_starts = cycle_starts[:-1]
    template = []
    for i in range(template_length):
        template.append(np.mean(signal[cycle_starts + i]))
    
    corr_coef = []
    for cycle_start in cycle_starts:
        corr_coef.append(np.corrcoef(template, signal[cycle_start:cycle_start+template_length])[0,1])

    valid_indices = np.argwhere(np.array(corr_coef) >= 0.8)
    if (len(valid_indices) > len(cycle_starts) / 2) and len(valid_indices) > 1:
        cycle_starts = cycle_starts[np.squeeze(valid_indices)]
        template2 = []
        for i in range(template_length):
            template2.append(np.mean(signal[cycle_starts + i]))
        template = template2
        
    if verbose:
        print('Cycle Template')
        plot = plt.plot(template)
        plt.show()
        
    # Check correlation of cycles with template
    # SQI1: Pearson Correlation
    sqi1_corr = []
    for cycle_start in cycle_starts:
        corr, _ = stats.pearsonr(template, signal[cycle_start:cycle_start+template_length])
        sqi1_corr.append(corr)
        
    # SQI2: Pearson Correlation between the cycle, re-sampled to match the template length, 
    # and the template itself
    sqi2_corr = []
    for cycle_start in cycle_starts:
        cycle_end = initial_cycle_starts[np.squeeze(np.argwhere(initial_cycle_starts==cycle_start)) + 1] 
        corr, _ = stats.pearsonr(template, sig.resample(signal[cycle_start:cycle_end], template_length))
        sqi2_corr.append(corr)
        
    # Filter for correlation >= 0.8
    corrs = np.array([sqi1_corr, sqi2_corr]).transpose()
    cycle_starts = cycle_starts[np.all(corrs >= 0.8, axis=1)]
    
    if verbose:
        print('Detected Valid Cycles')
        fig = plt.figure(figsize=(12, 10), dpi=300)
        #plt.xlabel('Samples', fontsize=24)
        #plt.ylabel('Normalized Magnitude', fontsize=24)
        plt.axis('off')
        for cycle_start in cycle_starts:
            plt.rcParams.update({'font.size': 16})
            plt.plot(signal[cycle_start:cycle_start+template_length].to_numpy())
        
        # Save Valid Cycles
        with open('../../config.json') as f:
            config = json.load(f)
        today = datetime.datetime.today().strftime('%Y-%m-%d')
        figure_path = config['figures']
    
        millis = int(round(time.time() * 1000))
        valid_cycles = os.path.join(figure_path, today, 'valid_cycles_eval')
        
        if not os.path.exists(valid_cycles):
            os.makedirs(valid_cycles)
        fig.savefig(os.path.join(valid_cycles, str(millis)+'_valid_cycles_eval.svg'), 
                    transparent=True, format='svg')
        
    cycles = []
    for cycle_start in cycle_starts:
        cycle_end = initial_cycle_starts[np.squeeze(np.argwhere(initial_cycle_starts==cycle_start)) + 1]
        if (cycle_end - cycle_start) > template_length*1.2:
            cycle_end = cycle_start + template_length
        cycles.append((cycle_start, cycle_end))

    return cycles
    
# Filter PPG Data
def extract_features_for_cycle(window_df, signal, verbose=False):
    cur_index = window_df.index.max() + 1
    if np.isnan(cur_index):
        cur_index = 0
    signal.index = pd.to_datetime(signal.index, unit='ms')
    signal = signal.interpolate(method='time')
    signal = signal - signal.min()
    max_amplitude = signal.max()
    
    peaks = sig.find_peaks(signal.values)[0]
    sys_peak_ts = signal.index[peaks[0]]
    
    if verbose:
        plt.figure()
        plt.xlim((signal.index.min(), signal.index.max()))
        plt.scatter(signal.index[peaks], signal[peaks])
        plt.plot(signal.index, signal.values)
    # Features
    window_df = window_df.append(pd.DataFrame({'start_ts': signal.index.min(),
                                               'sys_peak_ts': sys_peak_ts,
                                               'T_S': (sys_peak_ts - signal.index.min()).total_seconds(),
                                               'T_D': (signal.index.max() - sys_peak_ts).total_seconds()
                                              }, index=[cur_index]), sort=False)
    for p in [10, 25, 33, 50, 66, 75]:
        p_ampl = p / 100 * max_amplitude
        x1, x2 = find_xs_for_y(signal, p_ampl, peaks[0])
        if verbose:
            plt.scatter([x1, x2], signal[[x1, x2]])
        window_df.loc[cur_index, 'DW_'+str(p)] = (x2 - sys_peak_ts).total_seconds()
        window_df.loc[cur_index, 'DW_SW_sum_'+str(p)] = (x2 - x1).total_seconds()
        window_df.loc[cur_index, 'DW_SW_ratio_'+str(p)] = (x2 - sys_peak_ts) / (sys_peak_ts - x1)
    if verbose:
        plt.show()
    return window_df
    
def extract_features_for_window(df, verbose=False):
    cycles = find_clean_cycles_with_template(df['bvp_filtered'], verbose=verbose)
    if len(cycles) == 0:
        return pd.DataFrame()
    
    window_features = pd.DataFrame()
    cur_index = 0
    for i in range(len(cycles)):
        window_features = extract_features_for_cycle(window_features, df['bvp_filtered'].iloc[cycles[i][0]:cycles[i][1]], verbose=verbose)
        if i > 0:
            window_features.loc[cur_index-1, 'CP'] = (window_features.loc[cur_index, 'sys_peak_ts'] - window_features.loc[cur_index-1, 'sys_peak_ts']).total_seconds()
        cur_index = cur_index + 1
    if verbose:
        print('Cycle Features within Window:')
        print(window_features)
    window_features = clean_window_features_of_outliers(window_features)
    return window_features

def apply_filter(df, filter_type='cheby', fs=1000):
    # The eval dataset was already normalized
    if len(df['bvp']) <= 27:
        df['bvp_filtered'] = df['bvp']
        return df['bvp_filtered']
    elif filter_type == 'cheby':
        sos = sig.cheby2(4, 20, [0.5, 8], btype='bandpass', fs=fs, output='sos')
        df['bvp_filtered'] = sig.sosfiltfilt(sos, df['bvp'])
    elif filter_type == 'butter':
        sos = sig.butter(4, [0.5, 8], btype='bandpass', fs=fs, output='sos')
        df['bvp_filtered'] = sig.sosfiltfilt(sos, df['bvp'])
    return df

def extract_features_for_signal(signal, time, filter_type, verbose=False):
    new_rows = pd.DataFrame()
        
    # Path for the json (for the image representation)
    eval_dataset = os.path.join('..','..','datasets','eval')
    if not os.path.exists(eval_dataset):
        os.makedirs(eval_dataset)
        
    for index, row in signal.iterrows():
        print("From:", index)
        df = pd.Series(row['ppg'][:time])*(-1)
        window_df = pd.DataFrame.from_dict(df).rename(columns={0: 'bvp'})
        
        if verbose:
            fig = plt.figure(figsize=(12, 10))
            plt.plot(df)
            plt.show()
            fig = plt.figure(figsize=(12, 10))
            plt.plot(window_df['bvp'])
            plt.show()    
        
        window_df = apply_filter(window_df, filter_type=filter_type, fs=1000)
        
        # Generate new json (for the image representation)
        row['ppg'] = window_df['bvp_filtered'].tolist()
        new_rows = new_rows.append(row)
        
        if verbose: print("PPG shape", len(row['ppg'][:time]))
        if verbose:
            fig = plt.figure(figsize=(12, 10))
            plt.plot(df)
            plt.show()
            fig = plt.figure(figsize=(12, 10))
            plt.plot(window_df['bvp_filtered'])
            plt.show()    
        window_features = extract_features_for_window(window_df, verbose)
        for col in window_features.columns:
            if col.find('ts') == -1:
                signal.loc[index, col+'_mean'] = window_features[col].mean()
                signal.loc[index, col+'_var'] = window_features[col].var()

    signal.dropna(inplace=True, how='any')

    print(new_rows.shape)
    # Save the json (for the image representation)
    new_rows.rename(columns={'subject':'patientid', 'SYS(mmHg)':'sbp', 'DIA(mmHg)':'dbp'}, inplace = True)
    json_eval = new_rows.to_json(orient='records')
    json_eval_final = json.loads(json_eval)
    file_name = os.path.join(eval_dataset, 'eval_ppg_snippets-'+str(round(time/1000))+'sec-'+filter_type+'.json')
    with open(file_name, 'w') as f:
        f.write(json.dumps(json_eval_final))
    
    return signal

In [None]:
def extract_features(csv=True, time_delta='15 seconds', time_delta_type = 'bfill', 
                    experiment_type='eval', motion_filter=False, special_filter='cheby', verbose=False):
    
    with open('../../config.json') as f:
        config = json.load(f)

    today = datetime.datetime.today().strftime('%Y-%m-%d')

    exp_base_path = config['eval']
    figure_path = config['figures']
    
    if verbose:
        print(exp_base_path)
        print(figure_path)
        print('\n')

    today = datetime.datetime.today().strftime('%Y-%m-%d')
    eval_df = pd.read_json(os.path.join(exp_base_path, 'eval_ppg_snippets.json')).rename(
        columns={'patientid':'subject','sbp':'SYS(mmHg)','dbp':'DIA(mmHg)'})
    print("Eval shape: ", eval_df.shape)
    
    # Create features path if it does not exist
    features_path = os.path.join('..', '..', 'features', 'eval', today, experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower(), 'motion-not-filtered')

    if not os.path.exists(features_path):
        os.makedirs(features_path)

    print('Extracting Features...')
    print('-----','\n')
    
    time = int(time_delta.split()[0])*1000
    print("Time window (ms): ", time)
    features = extract_features_for_signal(eval_df, time=time, filter_type=special_filter, verbose=verbose)

    if 'T_S_mean' not in features:
        return 0

    if verbose: print('Features: ', features.shape)
        
    features['time_delta'] = time_delta
    features['time_delta_type'] = time_delta_type
    features['experiment_type'] = experiment_type
    features['motion_filter'] = motion_filter
    features['special_filter'] = special_filter

    if csv:
        all_features_path = features_path+'/all_features_{}_{}.csv'.format(experiment_type.replace(' ',''), time_delta.replace(' ','')+'-'+time_delta_type+'-'+str(special_filter).lower())
        all_features = features.drop(['ppg'], axis=1)
        all_features.to_csv(all_features_path, index=False)
    if verbose: print(all_features)
    if verbose: print('-----','\n')
    
    print('Amount of BP-Pairs: ', all_features.shape)
    print('Features Extracted.')
    print('-----','\n')
    
    return all_features_path

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def drop_correlation(df, labels, threshold = 0.95, plotcorr = False):
    corr = df.loc[:, ~df.columns.isin(labels)].corr()
    if plotcorr: 
        f, ax = plt.subplots(figsize=(15, 15))
        cmap = sns.diverging_palette(220, 10, as_cmap=True)
        sns.heatmap(corr, cmap = cmap,
                xticklabels=corr.columns.values,
                yticklabels=corr.columns.values)
    # Select upper triangle of correlation matrix
    upper = corr.abs().where(np.triu(np.ones(corr.shape), k=1).astype(bool))
    # Find features with correlation greater than threshold
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    print("Columns dropped: ", len(to_drop))
    # Drop features 
    return(df.drop(columns = to_drop))

def predict_bp_from_ppg(dataframe, predicted_variable = 'SBP', k = 1, correlation_threshold = 0.95, 
                        random_seed = 42, learning_rate = 0.01, n_estimators = 100, 
                        alpha = 1, l1_ratio = 0.5, random_state = 42, 
                        epochs = 50, batch_size = 5, n_jobs = -1, max_depth = 10, verbose = False):
     
    df = dataframe.rename(columns={"SYS(mmHg)": "SBP", "DIA(mmHg)": "DBP", 'subject': 'patientid'})
    
    # Dropping Null Values
    df.drop(df.loc[(df['SBP'] == 0)|(df['DBP'] == 0)].index, inplace = True)
    df = drop_correlation(df, ['SBP', 'DBP'], correlation_threshold, plotcorr = False)
    print("New Dataframe Shape: " + str(df.shape))
    if verbose: print(df.shape)

    features = df.shape[1]-3
    print("Nr of features: ", features)
    if verbose: print("Columns: ", df.columns)
    patient_ids = np.unique(df['patientid'])

    estimators_lr = []
    estimators_lr.append(('standardize', StandardScaler()))
    estimators_lr.append(('lr', ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=random_state)))
    pipeline_lr = Pipeline(estimators_lr)

    estimators_gbm = []
    estimators_gbm.append(('standardize', StandardScaler()))
    estimators_gbm.append(('gbm', GradientBoostingRegressor(learning_rate=learning_rate, n_estimators=n_estimators, random_state=random_seed)))
    pipeline_gbm = Pipeline(estimators_gbm)
    
    estimators_lgbm = []
    estimators_lgbm.append(('standardize', StandardScaler()))
    estimators_lgbm.append(('lgbm', lgb.LGBMRegressor(learning_rate=learning_rate, n_estimators=n_estimators, random_state=random_seed, n_jobs=n_jobs)))
    pipeline_lgbm = Pipeline(estimators_lgbm)
    
    estimators_rf = []
    estimators_rf.append(('standardize', StandardScaler()))
    estimators_rf.append(('rf', RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state, n_jobs=n_jobs)))
    pipeline_rf = Pipeline(estimators_rf)

    RMSE_LR = []
    MAPE_LR = []
    MAE_LR = []

    RMSE_GBM = []
    MAPE_GBM = []
    MAE_GBM = []
    
    RMSE_DUMMY = []
    MAPE_DUMMY = []
    MAE_DUMMY = []
    
    RMSE_LGBM = []
    MAPE_LGBM = []
    MAE_LGBM = []
    
    RMSE_RF = []
    MAPE_RF = []
    MAE_RF = []

    results = {}
    i = 0
    mean_train = 0
    mean_test = 0
    total = len(df.index)
    subjects = len(df['patientid'].unique())
    
    if verbose: print("BPPairs: ", total)
    if verbose: print("Subjects: ", subjects)
    if verbose: print("\n")

    while len(patient_ids) > 1:
        i= i + 1 

        # Random Seed
        random.seed(random_seed)

        patient_test_ids = random.choices(patient_ids, k = k)
        patient_ids = [e for e in patient_ids if e not in patient_test_ids]
        df_test = df.loc[df['patientid'].isin(patient_test_ids)].dropna()
        df_train = df[~df['patientid'].isin(patient_test_ids)].dropna()
        if verbose: print("Running fold" + str(i))
        if verbose: print("Train: ", df_train.shape)
        mean_train += len(df_train.index)
        if verbose: print("Test: ", df_test.shape)
        if verbose: print("Total: ", len(df_test.index) + len(df_train.index))
        mean_test += len(df_test.index)
        if verbose: print("\n")

        cols_dropped = ['patientid']

        if predicted_variable == 'SBP':
            cols_dropped.append('DBP')
        elif predicted_variable == 'DBP':
            cols_dropped.append('SBP')
        df_train = df_train.drop(columns = cols_dropped)
        df_test = df_test.drop(columns = cols_dropped)

        #lr
        pipeline_lr.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, 
                        y = df_train[predicted_variable].values)
        predicted_labels = pipeline_lr.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_LR.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_LR.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_LR.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))

        #gbm 
        pipeline_gbm.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, 
                         y = df_train[predicted_variable].values)
        predicted_labels = pipeline_gbm.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_GBM.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_GBM.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_GBM.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
        
        #lightgbm
        pipeline_lgbm.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, y = df_train[predicted_variable].values)
        predicted_labels = pipeline_lgbm.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_LGBM.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_LGBM.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_LGBM.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
        
        #rf
        pipeline_rf.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, y = df_train[predicted_variable].values)
        predicted_labels = pipeline_rf.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_RF.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_RF.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_RF.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
        
        #dummy_mean
        dummy_mean = DummyRegressor(strategy='mean')
        dummy_mean.fit(X = df_train.loc[:, df_train.columns != predicted_variable].values, 
                         y = df_train[predicted_variable].values)
        predicted_labels = dummy_mean.predict(df_test.loc[:, df_test.columns != predicted_variable].values)

        RMSE_DUMMY.append(np.sqrt(mean_squared_error(df_test[predicted_variable], predicted_labels)))  
        MAPE_DUMMY.append(mean_absolute_percentage_error(df_test[predicted_variable], predicted_labels))
        MAE_DUMMY.append(mean_absolute_error(df_test[predicted_variable], predicted_labels))
    
    # General Info
    results['subjects'] = subjects
    results['bp_pairs'] = total
    results['folders'] = i
    results['mean_train_size'] = round(mean_train/i)
    results['mean_test_size'] = round(mean_test/i)
    
    # Mean LR
    results['RMSE_LR_MEAN'] = np.mean(np.array(RMSE_LR))
    results['MAPE_LR_MEAN'] = np.mean(np.array(MAPE_LR))
    results['MAE_LR_MEAN'] = np.mean(np.array(MAE_LR))
            
    # STD LR
    results['RMSE_LR_STD'] = np.std(np.array(RMSE_LR))
    results['MAPE_LR_STD'] = np.std(np.array(MAPE_LR))
    results['MAE_LR_STD'] = np.std(np.array(MAE_LR))

    # Mean GBM
    results['RMSE_GBM_MEAN'] = np.mean(np.array(RMSE_GBM))
    results['MAPE_GBM_MEAN'] = np.mean(np.array(MAPE_GBM))
    results['MAE_GBM_MEAN'] = np.mean(np.array(MAE_GBM))
    
    # Std GBM
    results['RMSE_GBM_STD'] = np.std(np.array(RMSE_GBM))
    results['MAPE_GBM_STD'] = np.std(np.array(MAPE_GBM))
    results['MAE_GBM_STD'] = np.std(np.array(MAE_GBM))
    
    # Mean LGBM
    results['RMSE_LGBM_MEAN'] = np.mean(np.array(RMSE_LGBM))
    results['MAPE_LGBM_MEAN'] = np.mean(np.array(MAPE_LGBM))
    results['MAE_LGBM_MEAN'] = np.mean(np.array(MAE_LGBM))
    
    # Std LGBM
    results['RMSE_LGBM_STD'] = np.std(np.array(RMSE_LGBM))
    results['MAPE_LGBM_STD'] = np.std(np.array(MAPE_LGBM))
    results['MAE_LGBM_STD'] = np.std(np.array(MAE_LGBM))
    
    # Mean RF
    results['RMSE_RF_MEAN'] = np.mean(np.array(RMSE_RF))
    results['MAPE_RF_MEAN'] = np.mean(np.array(MAPE_RF))
    results['MAE_RF_MEAN'] = np.mean(np.array(MAE_RF))
    
    # Std RF
    results['RMSE_RF_STD'] = np.std(np.array(RMSE_RF))
    results['MAPE_RF_STD'] = np.std(np.array(MAPE_RF))
    results['MAE_RF_STD'] = np.std(np.array(MAE_RF))
    
    # Mean Dummy
    results['RMSE_DUMMY_MEAN'] = np.mean(np.array(RMSE_DUMMY))
    results['MAPE_DUMMY_MEAN'] = np.mean(np.array(MAPE_DUMMY))
    results['MAE_DUMMY_MEAN'] = np.mean(np.array(MAE_DUMMY))
    
    # Std Dummy
    results['RMSE_DUMMY_STD'] = np.std(np.array(RMSE_DUMMY))
    results['MAPE_DUMMY_STD'] = np.std(np.array(MAPE_DUMMY))
    results['MAE_DUMMY_STD'] = np.std(np.array(MAE_DUMMY))
    
    parameters = {
                    'predicted_variable' : predicted_variable,
                    'correlation_threshold' : correlation_threshold, 
                    'random_seed' :  random_seed,
                    'learning_rate' : learning_rate, 
                    'n_estimators' : n_estimators, 
                    'alpha' : alpha, 
                    'l1_ratio' : l1_ratio,
                    'random_state' : random_state, 
                    'k' : k, 
                    'features' : features, 
                    'epochs' : epochs, 
                    'batch_size' : batch_size,
                    'max_depth' : max_depth,
                    'n_jobs' : n_jobs,
    }    
    results.update(parameters)
    return results

# Extract Features

## JAIME 2021

In [None]:
feature_parameters = {
                        'csv' : True,
                        'time_delta' : '15 seconds',
                        'time_delta_type': 'bfill',       
                        'experiment_type' : 'eval', 
                        'special_filter' : 'cheby',
                        'verbose' : False
                        }

correlation_threshold = 0.6

# Experiments

In [None]:
path = extract_features(csv=feature_parameters['csv'],
                        time_delta=feature_parameters['time_delta'], 
                        time_delta_type=feature_parameters['time_delta_type'], 
                        experiment_type = feature_parameters['experiment_type'], 
                        special_filter = feature_parameters['special_filter'], 
                        verbose=feature_parameters['verbose'])

In [None]:
print(path)

In [None]:
print("Predicting for: ", path)
df = pd.read_csv(path)
print(df.shape)

experiments = []    
if not df.empty:
        predicted_variables = ['SBP', 'DBP']
        ks = [1, 2, 3]
        
        features = {
                    'time_delta' : df['time_delta'].unique()[0],
                    'time_delta_type' : df['time_delta_type'].unique()[0],
                    'experiment_type' : df['experiment_type'].unique()[0], 
                    'motion_filter' : df['motion_filter'].unique()[0],
                    'special_filter' : df['special_filter'].unique()[0]
                    }
        
        df.drop(features.keys(), axis=1, inplace=True)
        for variable in predicted_variables:
            for k in ks:
                results = predict_bp_from_ppg(df, predicted_variable = variable, 
                                              k = k, correlation_threshold = correlation_threshold)
                results.update(features)
                experiments.append(results)
else:
    ("Dataframe was empty: ", path)
all_experiments = pd.DataFrame.from_dict(experiments)

In [None]:
all_experiments = all_experiments.replace({'motion_filter': {True : 'yes', False: 'no'}})
all_experiments.head(2)

In [None]:
date = datetime.datetime.today().strftime('%Y-%m-%d')
all_experiments.to_csv('../../results/'+date+'_results_eval.csv', index=True, mode='w')

# Exploring the Data

In [None]:
results_path = '../../results/'+date+'_results_eval.csv'
all_experiments = pd.read_csv(results_path)

In [None]:
gbm = all_experiments[['predicted_variable','experiment_type','MAE_GBM_MEAN','MAE_GBM_STD','MAPE_GBM_MEAN','RMSE_GBM_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_GBM_MEAN'])
lgbm = all_experiments[['predicted_variable','experiment_type','MAE_LGBM_MEAN','MAE_LGBM_STD','MAPE_LGBM_MEAN','RMSE_LGBM_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_LGBM_MEAN'])
lr = all_experiments[['predicted_variable','experiment_type','MAE_LR_MEAN','MAE_LR_STD','MAPE_LR_MEAN','RMSE_LR_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_LR_MEAN'])
rf = all_experiments[['predicted_variable','experiment_type','MAE_RF_MEAN','MAE_RF_STD','MAPE_RF_MEAN','RMSE_RF_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_RF_MEAN'])
d = all_experiments[['predicted_variable','experiment_type','MAE_DUMMY_MEAN','MAE_DUMMY_STD','MAPE_DUMMY_MEAN','RMSE_DUMMY_MEAN','special_filter','time_delta','time_delta_type','motion_filter','k']].sort_values(by=['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN'])

#### GBM

In [None]:
gbm.loc[(gbm['experiment_type'] == 'eval') & (gbm['predicted_variable'] == 'SBP') & (gbm['k'] == 1)].head(1)

In [None]:
gbm.loc[(gbm['experiment_type'] == 'eval') & (gbm['predicted_variable'] == 'DBP') & (gbm['k'] == 1)].head(1)

In [None]:
gbm.groupby(['predicted_variable','experiment_type']).mean()

#### LR

In [None]:
lr.loc[(lr['experiment_type'] == 'eval') & (lr['predicted_variable'] == 'SBP') & (lr['k'] == 1)].head(1)

In [None]:
lr.loc[(lr['experiment_type'] == 'eval') & (lr['predicted_variable'] == 'DBP') & (lr['k'] == 1)].head(1)

In [None]:
lr.groupby(['predicted_variable','experiment_type']).mean()

#### LGBM

In [None]:
lgbm.loc[(lgbm['experiment_type'] == 'eval') & (lgbm['predicted_variable'] == 'SBP') & (lgbm['k'] == 1)].head(1)

In [None]:
lgbm.loc[(lgbm['experiment_type'] == 'eval') & (lgbm['predicted_variable'] == 'DBP') & (lgbm['k'] == 1)].head(1)

In [None]:
gbm.groupby(['predicted_variable','experiment_type']).mean()

#### RF

In [None]:
rf.loc[(rf['experiment_type'] == 'eval') & (rf['predicted_variable'] == 'SBP') & (rf['k'] == 1)].head(1)

In [None]:
rf.loc[(rf['experiment_type'] == 'eval') & (rf['predicted_variable'] == 'DBP') & (rf['k'] == 1)].head(1)

In [None]:
rf.groupby(['predicted_variable','experiment_type']).mean()

#### DUMMY

In [None]:
d.loc[(d['experiment_type'] == 'eval') & (d['predicted_variable'] == 'SBP')& (d['k'] == 1)].head(1)

In [None]:
d.loc[(d['experiment_type'] == 'eval') & (d['predicted_variable'] == 'DBP') & (d['k'] == 1)].head(1)

In [None]:
d.groupby(['predicted_variable','experiment_type']).mean()

### Best DBP & SBP JAIME 2021


In [None]:
gbm_min_eval_dbp = gbm.loc[(gbm['experiment_type'] == 'eval') & (gbm['predicted_variable'] == 'DBP') & (gbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_GBM_MEAN','MAE_GBM_STD']]
lgbm_min_eval_dbp = lgbm.loc[(lgbm['experiment_type'] == 'eval') & (lgbm['predicted_variable'] == 'DBP') & (lgbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LGBM_MEAN','MAE_LGBM_STD']]
rf_min_eval_dbp = rf.loc[(rf['experiment_type'] == 'eval') & (rf['predicted_variable'] == 'DBP') & (rf['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_RF_MEAN','MAE_RF_STD']]
lr_min_eval_dbp = lr.loc[(lr['experiment_type'] == 'eval') & (lr['predicted_variable'] == 'DBP') & (lr['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LR_MEAN','MAE_LR_STD']]
dummy_min_eval_dbp = d.loc[(d['experiment_type'] == 'eval') & (d['predicted_variable'] == 'DBP') & (d['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN','MAE_DUMMY_STD']]

min_eval_dbp = gbm_min_eval_dbp.set_index(['predicted_variable','experiment_type','k']).join(lgbm_min_eval_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_eval_dbp = min_eval_dbp.join(rf_min_eval_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_eval_dbp = min_eval_dbp.join(lr_min_eval_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_eval_dbp = min_eval_dbp.join(dummy_min_eval_dbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
display(min_eval_dbp)

gbm_min_eval_sbp = gbm.loc[(gbm['experiment_type'] == 'eval') & (gbm['predicted_variable'] == 'SBP') & (gbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_GBM_MEAN','MAE_GBM_STD']]
lgbm_min_eval_sbp = lgbm.loc[(lgbm['experiment_type'] == 'eval') & (lgbm['predicted_variable'] == 'SBP') & (lgbm['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LGBM_MEAN','MAE_LGBM_STD']]
rf_min_eval_sbp = rf.loc[(rf['experiment_type'] == 'eval') & (rf['predicted_variable'] == 'SBP') & (rf['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_RF_MEAN','MAE_RF_STD']]
lr_min_eval_sbp = lr.loc[(lr['experiment_type'] == 'eval') & (lr['predicted_variable'] == 'SBP') & (lr['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_LR_MEAN','MAE_LR_STD']]
dummy_min_eval_sbp = d.loc[(d['experiment_type'] == 'eval') & (d['predicted_variable'] == 'SBP') & (d['k'] == 1)].head(1)[['predicted_variable','experiment_type','k','MAE_DUMMY_MEAN','MAE_DUMMY_STD']]

min_eval_sbp = gbm_min_eval_sbp.set_index(['predicted_variable','experiment_type','k']).join(lgbm_min_eval_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_eval_sbp = min_eval_sbp.join(rf_min_eval_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_eval_sbp = min_eval_sbp.join(lr_min_eval_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
min_eval_sbp = min_eval_sbp.join(dummy_min_eval_sbp.set_index(['predicted_variable','experiment_type','k']), on=['predicted_variable','experiment_type','k'])
display(min_eval_sbp)

best_results = pd.concat([min_eval_dbp, min_eval_sbp], axis=0)
display(best_results)

In [None]:
date = datetime.datetime.today().strftime('%Y-%m-%d')
best_results.to_csv('../../results/'+date+'_best_results_eval.csv', index=True, mode='w')