In [1]:
import pandas as pd
import pickle
import os
import numpy as np
# data = pickle.load(open('../data/leftppgecg.p','rb'))
directory = '../data_users/'

In [2]:
import zipfile
with zipfile.ZipFile(directory+'ecg_ppg_25_left.zip', 'r') as zip_ref:
    zip_ref.extractall(directory+'ecg_ppg_25_left')

In [2]:
directory = '../data_users/ecg_ppg_25_left/'

In [3]:
for d in os.listdir(directory):
    if d[-1]!='p' or d not in ['0c726695-f016-4019-9aab-c292298ee10c.p']:
        continue
    data = pickle.load(open(directory+d,'rb'))
    print(data.shape,d,data.dropna().shape)

(19842, 16) 0c726695-f016-4019-9aab-c292298ee10c.p (7430, 16)


In [93]:
from scipy.stats import iqr
from datetime import datetime
from copy import deepcopy

def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    values, weights -- Numpy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return average, math.sqrt(variance)

def get_rr_features(a):
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])

def get_quality_features(a):
    features = [np.mean(a),np.median(a),
                np.percentile(a,75),np.percentile(a,25),
                len(np.where(a>0)[0])/60,len(np.where(a>.25)[0])/60,len(np.where(a>.5)[0])/60,len(np.where(a>.75)[0])/60]
    return np.array(features)

def get_daywise(data):
    return [a for i,a in data.groupby(['user','day']) if a.dropna().shape[0]>60]

def parse_day_data(data_day):
    data_day = data_day.sort_values('ltime').reset_index(drop=True)
    data_day['likelihood_max_array'] = data_day['likelihood_max_array'].apply(lambda a:np.squeeze(a))
    data_day['likelihood'] = data_day['likelihood_max_array'].apply(lambda a:np.max(a,axis=1))
    data_day['likelihood_ind'] = data_day['likelihood_max_array'].apply(lambda a:np.argmax(a,axis=1))
    data_day['rr_array'] = data_day['rr_array'].apply(lambda a:np.squeeze(a))
    data_day['time'] = data_day['ltime'].apply(lambda a:datetime.timestamp(a))
    indexes = data_day['likelihood_ind'].values
    rr_arrays = data_day['rr_array'].values
    rrs = []
    for i,rr in enumerate(rr_arrays):
        index = indexes[i]
        frr = np.squeeze(np.array([rr[i,index[i]] for i in range(rr.shape[0])]))
        rrs.append(frr)
    data_day['rr'] = rrs
    data_day['rr_col'] = data_day.apply(lambda a: np.vstack([np.squeeze(a['rr']),np.squeeze(a['likelihood'])]).T,
                     axis=1)
    return data_day

def remove_3sd(heart_rate_window):
    temp = deepcopy(heart_rate_window)
    try:
        r,tt = weighted_avg_and_std(heart_rate_window[heart_rate_window[:,1]>.25,0],heart_rate_window[heart_rate_window[:,1]>.25,1])
        index = np.where((heart_rate_window[:,0]<r+3*tt)&(heart_rate_window[:,0]>r-3*tt))[0]
        heart_rate_window = heart_rate_window[index]
    except:
        pass
    if heart_rate_window.shape[0]>10:
        return [heart_rate_window,'Available']
    else:
        return [temp,'Not Available']

    
def parse_for_features(data_day):
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[np.where((a[:,1]>.05)&(a[:,0]>300)&(a[:,0]<1500))[0]])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:remove_3sd(a))
    data_day['indicator'] = data_day['rr_col'].apply(lambda a:a[1])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[0])
    data_day['likelihood'] = data_day['rr_col'].apply(lambda a:a[:,1])
    data_day['rr'] = data_day['rr_col'].apply(lambda a:a[:,0])
    data_day['rr_features'] = data_day['rr'].apply(lambda a:get_rr_features(a))
    data_day['quality_features'] = data_day['likelihood'].apply(lambda a:get_quality_features(a))
    data_day['quality_mag'] = data_day['quality_features'].apply(lambda a:np.sqrt(np.sum(np.square(a))/8))
    return data_day

data_all = get_daywise(data)
a = data_all[0]
a = parse_day_data(a)
a = parse_for_features(a)
stress_model = pickle.load(open('../models/'))

In [94]:
a['quality_features'].loc[0],a['quality_mag'].loc[0]

(array([0.33755972, 0.29083333, 0.52175   , 0.14283333, 0.66666667,
        0.35      , 0.2       , 0.05      ]),
 0.3709109498754514)

In [103]:
np.array(list(a['rr_features'].values)).shape

(829, 7)

In [51]:
a['time'].shape

(829,)