In [1]:
import pandas as pd
import pickle
import os
import numpy as np
# data = pickle.load(open('../data/leftppgecg.p','rb'))
directory = '../data_users/'

In [2]:
import zipfile
with zipfile.ZipFile(directory+'ecg_ppg_25_left.zip', 'r') as zip_ref:
    zip_ref.extractall(directory+'ecg_ppg_25_left')

In [2]:
directory = '../data_users/ecg_ppg_25_left/'

In [3]:
for d in os.listdir(directory):
    if d[-1]!='p' or d not in ['0c726695-f016-4019-9aab-c292298ee10c.p']:
        continue
    data = pickle.load(open(directory+d,'rb'))
    print(data.shape,d,data.dropna().shape)

(19842, 16) 0c726695-f016-4019-9aab-c292298ee10c.p (7430, 16)


In [117]:
from scipy.stats import iqr
from datetime import datetime
from copy import deepcopy
import math
from scipy.stats import pearsonr

def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    values, weights -- Numpy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return average, math.sqrt(variance)

def get_rr_features(a):
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])

def get_quality_features(a):
    features = [np.mean(a),np.median(a),
                np.percentile(a,75),np.percentile(a,25),
                len(np.where(a>0)[0])/60,len(np.where(a>.25)[0])/60,len(np.where(a>.5)[0])/60,len(np.where(a>.75)[0])/60]
    return np.array(features)

def get_daywise(data):
    return [a for i,a in data.groupby(['user','day']) if a.dropna().shape[0]>60]

def parse_day_data(data_day):
    data_day = data_day.sort_values('ltime').reset_index(drop=True)
    data_day['likelihood_max_array'] = data_day['likelihood_max_array'].apply(lambda a:np.squeeze(a))
    data_day['likelihood'] = data_day['likelihood_max_array'].apply(lambda a:np.max(a,axis=1))
    data_day['likelihood_ind'] = data_day['likelihood_max_array'].apply(lambda a:np.argmax(a,axis=1))
    data_day['rr_array'] = data_day['rr_array'].apply(lambda a:np.squeeze(a))
    data_day['time'] = data_day['ltime'].apply(lambda a:datetime.timestamp(a))
    indexes = data_day['likelihood_ind'].values
    rr_arrays = data_day['rr_array'].values
    rrs = []
    for i,rr in enumerate(rr_arrays):
        index = indexes[i]
        frr = np.squeeze(np.array([rr[i,index[i]] for i in range(rr.shape[0])]))
        rrs.append(frr)
    data_day['rr'] = rrs
    data_day['rr_col'] = data_day.apply(lambda a: np.vstack([np.squeeze(a['rr']),np.squeeze(a['likelihood'])]).T,
                     axis=1)
    return data_day

def remove_3sd(heart_rate_window):
    temp = deepcopy(heart_rate_window)
    try:
        r,tt = weighted_avg_and_std(heart_rate_window[heart_rate_window[:,1]>.25,0],heart_rate_window[heart_rate_window[:,1]>.25,1])
        index = np.where((heart_rate_window[:,0]<r+3*tt)&(heart_rate_window[:,0]>r-3*tt))[0]
        heart_rate_window = heart_rate_window[index]
    except:
        pass
    if heart_rate_window.shape[0]>10:
        return [heart_rate_window,'Available']
    else:
        return [temp,'Not Available']

    
def parse_for_features(data_day):
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[np.where((a[:,1]>.05)&(a[:,0]>300)&(a[:,0]<1500))[0]])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:remove_3sd(a))
    data_day['indicator'] = data_day['rr_col'].apply(lambda a:a[1])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[0])
    data_day['likelihood'] = data_day['rr_col'].apply(lambda a:a[:,1])
    data_day['rr'] = data_day['rr_col'].apply(lambda a:a[:,0])
    data_day['rr_features'] = data_day['rr'].apply(lambda a:get_rr_features(a))
    data_day['quality_features'] = data_day['likelihood'].apply(lambda a:get_quality_features(a))
    data_day['quality_mag'] = data_day['quality_features'].apply(lambda a:np.sqrt(np.sum(np.square(a))/8))
    return data_day

def get_stress(data_day,stress_model):
    feature_matrix = np.array(list(a['rr_features'].values))
    quals1 = np.array(list(a['quality_mag'].values))
    for i in range(feature_matrix.shape[1]):
        m,s = weighted_avg_and_std(feature_matrix[:,i], quals1)
        feature_matrix[:,i]  = (feature_matrix[:,i] - m)/s
    stress_likelihood = stress_model.predict_proba(feature_matrix)[:,1]
    data_day['stress_likelihood1'] = stress_likelihood
    return data_day

def get_corr(data_day):
    data_day = data_day.dropna()
    data_day['quality_mag_1'] = data_day['quality_mag'].apply(lambda a:np.round(100*a)//10)
    all_corr = np.array([np.array([df['quality_mag_1'].values[0],
                          pearsonr(df['stress_likelihood_ecg'].values,df['stress_likelihood'].values)[0],
                          pearsonr(df['stress_likelihood_ecg'].values,df['stress_likelihood1'].values)[0],
                          pearsonr(df['stress_likelihood'].values,df['stress_likelihood1'].values)[0]]) for i,df in data_day.groupby(['quality_mag_1']) if df.shape[0]>20])
    return all_corr
    



data_all = get_daywise(data)
a = data_all[0]
a = parse_day_data(a)
a = parse_for_features(a)
stress_model = pickle.load(open('../models/stress.p','rb'))
a = get_stress(a,stress_model)
all_corr = get_corr(a)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [118]:
all_corr

array([[ 2.        , -0.26665371, -0.29522191,  0.68420255],
       [ 3.        ,  0.03016076,  0.01954035,  0.67383293],
       [ 4.        , -0.0204236 ,  0.01441313,  0.74303852],
       [ 5.        ,  0.41676042,  0.60915975,  0.21845877]])

In [108]:
a[['quality_mag','stress_likelihood','stress_likelihood_ecg','stress_likelihood1']].dropna()

Unnamed: 0,quality_mag,stress_likelihood,stress_likelihood_ecg,stress_likelihood1
0,0.370911,0.201899,0.033323,0.084747
1,0.361321,0.006499,0.021960,0.140392
2,0.328478,0.171370,0.040796,0.172813
3,0.424161,0.423396,0.029207,0.161418
4,0.395056,0.017021,0.067507,0.097283
...,...,...,...,...
706,0.594561,0.480980,0.114348,0.157681
707,0.666574,0.311742,0.108311,0.175355
708,0.553743,0.521317,0.126435,0.105184
709,0.644743,0.470700,0.903775,0.211244


In [51]:
a['time'].shape

(829,)