In [14]:
from scipy.stats import iqr,skew,kurtosis
from datetime import datetime
from copy import deepcopy
import math
from scipy.stats import pearsonr
from sklearn.externals.joblib import Parallel,delayed
import warnings
import pandas as pd
import pickle
import os
import numpy as np
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    values, weights -- Numpy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return average, math.sqrt(variance)

def get_rr_features(a):
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])


def get_weighted_rr_features(a):
    a = np.repeat(a[:,0],np.int64(np.round(100*a[:,1])))
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])

# def get_all_features(a):
#     try:
#         orig = a.shape[0]
#         feature_availability = [len(a)]
#         feature_qual = [np.median(a[:,1]),np.percentile(a[:,1],80),np.percentile(a[:,1],20),iqr(a[:,1]),np.std(a[:,1]),skew(a[:,1]),kurtosis(a[:,1])]
#         feature_activity = [np.median(a[:,2]),np.percentile(a[:,2],80),np.percentile(a[:,2],20),iqr(a[:,2]),np.std(a[:,2]),skew(a[:,2]),kurtosis(a[:,2])]
#         a = a[np.where((a[:,0]>300)&(a[:,0]<1500)&(a[:,1]>.15))[0],:]
#         if len(a)<3:
#             return np.array([0]*30)
#         feature_qual+=[np.median(a[:,1]),np.percentile(a[:,1],80),np.percentile(a[:,1],20),iqr(a[:,1]),np.std(a[:,1]),skew(a[:,1]),kurtosis(a[:,1])]
#         feature_activity+=[np.median(a[:,2]),np.percentile(a[:,2],80),np.percentile(a[:,2],20),iqr(a[:,2]),np.std(a[:,2]),skew(a[:,2]),kurtosis(a[:,2])]
#         feature_availability += [len(a)]
# #         print(np.array(feature_activity+feature_availability+feature_qual))
#         return np.array(feature_activity+feature_availability+feature_qual)
#     except Exception as e:
#         print(e)
#         return np.array([0]*30)

def get_quality_features(a):
    feature = [np.percentile(a,50),np.mean(a),
               len(a[a>.2])/60,len(a[a>.6])/60]
    return np.array(feature)

def get_daywise(data):
    return [a for i,a in data.groupby(['user','day'],as_index=False) if a[['likelihood_max_array','rr_array']].dropna().shape[0]>120]

def parse_day_data(data_day):
    data_day['likelihood_max_array'] = data_day['likelihood_max_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['likelihood'] = data_day['likelihood_max_array'].apply(lambda a:np.max(a,axis=1))
    data_day['likelihood_ind'] = data_day['likelihood_max_array'].apply(lambda a:np.argmax(a,axis=1))
    data_day['rr_array'] = data_day['rr_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['length'] = data_day['rr_array'].apply(lambda a:a.shape[0])
    data_day = data_day[data_day.length>20]
    data_day['time'] = data_day['ltime'].apply(lambda a:datetime.timestamp(a))
    indexes = data_day['likelihood_ind'].values
    rr_arrays = data_day['rr_array'].values
    rrs = []
    for i,rr in enumerate(rr_arrays):
        index = indexes[i]
        frr = np.squeeze(np.array([rr[i,index[i]] for i in range(rr.shape[0])]))
        rrs.append(frr)
    data_day['rr'] = rrs
    data_day['rr_col'] = data_day.apply(lambda a: np.vstack([np.squeeze(a['rr']),np.squeeze(a['likelihood']),np.squeeze(a['activity'])]).T,
                     axis=1)
    return data_day

def remove_3sd(heart_rate_window):
    temp = deepcopy(heart_rate_window)
    try:
        r,tt = weighted_avg_and_std(heart_rate_window[heart_rate_window[:,1]>.25,0],heart_rate_window[heart_rate_window[:,1]>.25,1])
        index = np.where((heart_rate_window[:,0]<r+3*tt)&(heart_rate_window[:,0]>r-3*tt))[0]
        heart_rate_window = heart_rate_window[index]
    except:
        pass
    if heart_rate_window.shape[0]>10:
        return [heart_rate_window,'Available']
    else:
        return [temp[:10],'Not Available']

    
def parse_for_features(data_day):
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[np.where((a[:,1]>.05)&(a[:,0]>300)&(a[:,0]<1500)&(a[:,2]<.2))[0],:2])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:remove_3sd(a))
    data_day['length1'] = data_day['rr_col'].apply(lambda a:a[0].shape[0])
    data_day = data_day[data_day.length1>40]
    data_day['indicator'] = data_day['rr_col'].apply(lambda a:a[1])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[0])
    data_day['likelihood'] = data_day['rr_col'].apply(lambda a:a[:,1])
    data_day['rr'] = data_day['rr_col'].apply(lambda a:a[:,0])
    
    data_day['mean'] = data_day['rr'].apply(lambda a:np.mean(a))
    data_day['std'] = data_day['rr'].apply(lambda a:np.std(a))
    m = np.percentile(data_day['mean'],70)
    s = np.percentile(data_day['std'],30)
    data_day['rr'] = data_day['rr'].apply(lambda a:(np.array(a)-m)/s)
    data_day['rr_col1'] = data_day.apply(lambda a:np.vstack([list(a['rr']),list(a['likelihood'])]).T,axis=1)
    
    data_day['rr_features'] = data_day['rr'].apply(lambda a:get_rr_features(a))
    data_day['rr_weighted_features'] = data_day['rr_col1'].apply(lambda a:get_weighted_rr_features(a))
    stress_model = pickle.load(open('../models/stress_model_weighted_2.p','rb'))
    data_day['quality_features'] = data_day['likelihood'].apply(lambda a:get_quality_features(a))
    data_day['quality_mag'] = data_day['quality_features'].apply(lambda a:np.sum(a)/len(a))
    
    feature_matrix = np.array(list(data_day['rr_weighted_features'].values))
    quals1 = np.array(list(data_day['quality_mag'].values))
    feature_matrix = normalize_daywise(feature_matrix,quals1)
    stress_likelihood = stress_model.predict_proba(feature_matrix)[:,1]
    data_day['stress_likelihood_ppg_qual'] = stress_likelihood
    
    feature_matrix = np.array(list(data_day['rr_weighted_features'].values))
    feature_matrix = normalize_daywise(feature_matrix,[1]*len(quals1))
    stress_likelihood = stress_model.predict_proba(feature_matrix)[:,1]
    data_day['stress_likelihood_ppg'] = stress_likelihood
    return data_day

def normalize_daywise(feature_matrix,quals1):
    for i in range(feature_matrix.shape[1]):
        m,s = weighted_avg_and_std(feature_matrix[:,i], quals1)
        feature_matrix[:,i]  = (feature_matrix[:,i] - m)/s
    return feature_matrix


# def get_stress(data_day):
#     stress_model = pickle.load(open('../models/stress_model_weighted.p','rb'))
#     feature_matrix = np.array(list(data_day['rr_weighted_features'].values))
#     quals1 = np.array(list(data_day['quality_mag'].values))
#     feature_matrix = normalize_daywise(feature_matrix,quals1)
# #     print(feature_matrix.shape)
#     stress_likelihood = stress_model.predict_proba(feature_matrix)[:,1]
#     data_day['stress_likelihood1'] = stress_likelihood
    
#     stress_model = pickle.load(open('../models/stress_model_weighted.p','rb'))
#     feature_matrix = np.array(list(data_day['rr_weighted_features'].values))
#     feature_matrix = normalize_daywise(feature_matrix,[1]*len(quals1))
# #     print(feature_matrix.shape)
#     stress_likelihood = stress_model.predict_proba(feature_matrix)[:,1]
#     data_day['stress_likelihood2'] = stress_likelihood
#     return data_day

# def get_corr(data_day1):
#     if data_day1.shape[0]<60:
#         return np.zeros((0,7))
#     data_day1['quality_mag_1'] = data_day1['quality_mag'].apply(lambda a:np.round(100*a)/100)
#     all_corr = []
#     for q in np.unique(data_day1['quality_mag_1'].values):
#         tmp = data_day1[data_day1.quality_mag_1>=q]
#         tmp2 = tmp[['stress_likelihood_ecg','stress_likelihood','stress_likelihood1','stress_likelihood2']].dropna()
#         if tmp2.shape[0]<20:
#             continue
#         feature = np.array([q,
#                    r2_score(tmp2['stress_likelihood_ecg'].values,tmp2['stress_likelihood'].values),
#                    r2_score(tmp2['stress_likelihood_ecg'].values,tmp2['stress_likelihood1'].values),
#                    r2_score(tmp2['stress_likelihood_ecg'].values,tmp2['stress_likelihood2'].values),
#                    r2_score(tmp2['stress_likelihood'].values,tmp2['stress_likelihood1'].values),
#                    tmp['stress_likelihood1'].dropna().shape[0],
#                    data_day1['stress_likelihood_ecg'].dropna().shape[0]])
#         all_corr.append(feature)
#     return np.array(all_corr)


def parse_day_data_ecg(data_day):
    data_day = data_day[['ecg_rr_array','index','ltime','window']].dropna()
    data_day['count_ecg'] = data_day['ecg_rr_array'].apply(lambda a:len(a))
    data_day = data_day[data_day.count_ecg>20]
    data_day['mean'] = data_day['ecg_rr_array'].apply(lambda a:np.mean(a))
    data_day['std'] = data_day['ecg_rr_array'].apply(lambda a:np.std(a))
    m = np.percentile(data_day['mean'],70)
    s = np.percentile(data_day['std'],30)
    data_day['ecg_rr_array_final'] = data_day['ecg_rr_array'].apply(lambda a:(np.array(a)-m)/s)
    data_day['ecg_features'] = data_day['ecg_rr_array_final'].apply(lambda a:get_rr_features(a))
    X = np.array(list(data_day['ecg_features']))
    X = StandardScaler().fit_transform(X)
    clf = pickle.load(open('../models/stress_model_ecg_2.p','rb'))
    y_pred = clf.predict_proba(X)[:,1]
    data_day['stress_likelihood_ecg'] = list(y_pred)
    return data_day

def parse_each_day_ppg_ecg(a):
    columns = ['window', 'ltime', 'likelihood_max_array', 'activity', 'rr_array',
       'time', 'timestamp', 'likelihood_mean', 'localtime', 'ecg_rr_array',
       'day', 'version', 'user', 'quality_features', 'activity_features',
       'index', 'likelihood', 'likelihood_ind', 'length', 'rr', 'rr_col',
       'length1', 'indicator', 'mean', 'std', 'rr_features',
       'rr_weighted_features', 'quality_mag', 'stress_likelihood_ppg_qual','stress_likelihood_ecg',
       'stress_likelihood_ppg']
    try:
        ecg_columns = ['window', 'stress_likelihood_ecg']
        a['index'] = a.index.values
        a = a.drop(['stress_likelihood', 'stress_likelihood_ecg'],axis=1)
        a_ecg = pd.DataFrame([],columns=ecg_columns)
        if a['ecg_rr_array'].dropna().shape[0]<120:
            stress_likelihood_ecg = 0
        else:
            a_ecg = parse_day_data_ecg(deepcopy(a))
            a_ecg = a_ecg[ecg_columns]
        a_ppg = parse_day_data(a)
        a_ppg = parse_for_features(a_ppg)
        if a_ppg.shape[0]<60:
            return pd.DataFrame([],columns=columns)
        if a_ecg.shape[0]<60:
            a_ppg['stress_likelihood_ecg'] = np.nan
            return a_ppg[columns]
        a_ppg = pd.merge(a_ppg, a_ecg, how='left', left_on=['window'], right_on=['window'])
#     plt.figure(figsize=(16,8))
#     plt.plot(a_ppg['ltime'],a_ppg['stress_likelihood_ecg'],'*')
#     plt.plot(a_ppg['ltime'],a_ppg['stress_likelihood_ppg'],'o')
#     plt.plot(a_ppg['ltime'],a_ppg['stress_likelihood_ppg_qual'],'s')
#     plt.show()
    
        return a_ppg[columns]
    except Exception as e:
        print(e)
        return pd.DataFrame([],columns=columns)

# def parse_each_day(a):
#     columns = a.columns
#     ecg_len = a['stress_likelihood_ecg'].dropna().shape[0]
#     a = parse_day_data(a)
#     a = parse_for_features(a)
#     if a.shape[0]<200:
#         return np.zeros((0,7)),np.zeros((0,4)),np.zeros((0,3)),a
#     try:
#         a = get_stress(a)
#     except:
#         return np.zeros((0,7)),np.zeros((0,4)),np.zeros((0,3)),a
#     a['quality_mag_1'] = a['quality_mag'].apply(lambda a:np.round(100*a)/100)
#     ppg_len = a['stress_likelihood1'].dropna().shape[0]
#     a1 = a[['quality_mag_1','stress_likelihood1']].dropna()
#     ff = []
#     for q in np.unique(a1['quality_mag_1'].values):
#         ff.append(np.array([q,a1[a1.quality_mag_1==q].shape[0],ecg_len,ppg_len]))
#     all_corr = get_corr(a)
#     if all_corr.shape[0]==0:
#         return np.zeros((0,7)),np.array(ff).reshape(-1,4),np.zeros((0,3)),a
#     tmp2 = a[['stress_likelihood_ecg','stress_likelihood','stress_likelihood1','stress_likelihood2']].dropna()
#     try:
#         tmp = np.array([r2_score(tmp2['stress_likelihood_ecg'].values,tmp2['stress_likelihood'].values),
#                        r2_score(tmp2['stress_likelihood_ecg'].values,tmp2['stress_likelihood1'].values),
#                        r2_score(tmp2['stress_likelihood_ecg'].values,tmp2['stress_likelihood2'].values)])
#     except:
#         return np.zeros((0,7)),np.array(ff).reshape(-1,4),np.zeros((0,3)),a
#     if len(tmp[~np.isnan(tmp)])<3:
#         return np.zeros((0,7)),np.array(ff).reshape(-1,4),np.zeros((0,3)),a
#     else:
#         return all_corr,np.array(ff).reshape(-1,4),tmp.reshape(-1,3),a

def parse_each_participant(directory,d):
    data = pickle.load(open(directory+d,'rb')).reset_index(drop=True)
    ema = data[['user','day','window','time','ltime','all_scores','score','label']]
    data = data.drop(['all_scores','score','label'],axis=1)
    data_all = get_daywise(data)
    if len(data_all)==0:
        return 0
    final_output = Parallel(n_jobs=25,verbose=4)(delayed(parse_each_day_ppg_ecg)(a) for a in data_all)
#     final_output = [parse_each_day_ppg_ecg(a) for a in data_all]
    final_output = [a for a in final_output if a.shape[0]>0]
    if len(final_output)==0:
        return 0
    final_output = pd.concat(final_output)
    pickle.dump([final_output,ema],open(directory1+d,'wb'))
    return 0
directory = '../../cc3/rice_data/ecg_ppg_25_left3/'
directory1 = '../../cc3/rice_data/ecg_ppg_25_left4/'
# all_data = Parallel(n_jobs=30,verbose=2)(delayed(parse_each_participant)(directory,d) for d in os.listdir(directory)[:2] if d[-1]=='p')
all_data = [parse_each_participant(directory,d) for d in os.listdir(directory) if d[-1]=='p']

[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    3.3s remaining:   21.7s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    4.2s remaining:    6.3s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    4.8s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    5.2s finished
[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   5 | elapsed:    2.7s remaining:    4.0s
[Parallel(n_jobs=25)]: Done   5 out of   5 | elapsed:    4.1s finished
[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    3.7s remaining:    8.2s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    4.1s remaining:    2.6s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    6.1s finished
[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent wo

In [15]:
import pickle
df = pickle.load(open('../../cc3/rice_data/ecg_ppg_25_left4/35109a64-411d-4768-9602-c0a3d519a088.p','rb'))

In [19]:
df[0].dropna()[:400]

Unnamed: 0,window,ltime,likelihood_max_array,activity,rr_array,time,timestamp,likelihood_mean,localtime,ecg_rr_array,...,length1,indicator,mean,std,rr_features,rr_weighted_features,quality_mag,stress_likelihood_ppg_qual,stress_likelihood_ecg,stress_likelihood_ppg
18,"(2019-06-28 15:22:00, 2019-06-28 15:23:00)",2019-06-28 14:22:00.006070,"[[0.04333333333333334, 0.03, 1.0], [0.21, 0.05...","[0.045964228741619095, 0.061495123743419804, 0...","[[570.6134094151213, 1033.5917312661497, 921.6...",1.561750e+09,2019-06-28 20:22:00.006070,0.750833,2019-06-28 14:22:00.006070,"[954.0, 885.0, 920.0, 841.0, 702.0, 848.0, 101...",...,41,Available,925.881191,125.516335,"[0.5453590841082013, 0.6560097506479152, 1.163...","[0.333299759369898, 0.6529408464304356, 1.0429...",0.626628,0.207586,0.024561,0.288264
19,"(2019-06-28 15:43:00, 2019-06-28 15:44:00)",2019-06-28 14:43:21.028308,"[[0.17666666666666667, 0.145, 0.43], [0.02, 0....","[0.09413990066414674, 0.14873875180443388, 0.0...","[[782.7788649706457, 769.2307692307693, 757.57...",1.561751e+09,2019-06-28 20:43:01.026891,0.873333,2019-06-28 14:43:01.026891,"[600.0, 552.0, 545.0, 780.0, 890.0, 699.0, 763...",...,43,Available,840.826074,74.821932,"[0.19379380472780097, 0.44652549644433787, 0.6...","[0.12031233676585673, 0.325462273577487, 0.747...",0.695723,0.100329,0.055844,0.140739
30,"(2019-06-28 15:52:00, 2019-06-28 15:53:00)",2019-06-28 14:52:16.009248,"[[0.613, 0.6503333333333333, 0.015], [0.0, 0.5...","[0.048581771752591874, 0.05749034777197076, 0....","[[572.2460658082975, 508.2592121982211, 863.93...",1.561752e+09,2019-06-28 20:52:00.030231,0.731667,2019-06-28 14:52:00.030231,"[1070.0, 706.0, 948.0, 790.0, 786.0, 894.0, 86...",...,60,Available,817.802821,160.904659,"[0.8962296102965366, 1.18203112204518, 0.52774...","[0.7629029306539412, 1.0114319850541953, 0.542...",0.803320,0.043324,0.060463,0.048456
44,"(2019-06-28 20:41:00, 2019-06-28 20:42:00)",2019-06-28 19:41:24.024285,"[[0.04, 0.03, 0.45500000000000007], [0.02, 0.0...","[0.007806260633017041, 0.005628471651700564, 0...","[[0.0, 653.5947712418301, 667.7796327212019], ...",1.561769e+09,2019-06-29 01:41:00.031095,0.736250,2019-06-28 19:41:00.031095,"[764.0, 667.0, 756.0, 646.0, 717.0, 677.0, 632...",...,45,Available,732.186108,61.168613,"[0.1295207407069287, 0.4352199471725505, 0.024...","[0.10112060895180172, 0.4054624037419985, 0.02...",0.622758,0.080355,0.443577,0.091957
47,"(2019-06-28 20:48:00, 2019-06-28 20:49:00)",2019-06-28 19:48:48.035556,"[[0.076, 0.26066666666666666, 0.03], [0.0525, ...","[0.13138716637157097, 0.011763992680630693, 0....","[[997.5062344139651, 913.2420091324201, 503.14...",1.561769e+09,2019-06-29 01:48:01.010035,0.357583,2019-06-28 19:48:01.010035,"[809.0, 850.0, 631.0, 745.0, 584.0, 755.0, 705...",...,42,Available,700.869657,242.381049,"[2.0336648016183245, 2.4911749975697264, -0.16...","[1.5172561554804191, 1.451595166197946, -0.284...",0.369949,0.169668,0.427627,0.152469
52,"(2019-06-28 21:16:00, 2019-06-28 21:17:00)",2019-06-28 20:16:01.031388,"[[0.14833333333333334, 0.12416666666666666, 0....","[0.03876936001941769, 0.016198199261513224, 0....","[[600.6006006006006, 594.3536404160475, 760.45...",1.561771e+09,2019-06-29 02:16:00.029059,0.860000,2019-06-28 20:16:00.029059,"[1426.0, 1286.0, 832.0, 632.0, 767.0, 883.0, 7...",...,52,Available,805.385411,87.433114,"[0.2646268729663976, 0.5942334457645401, 0.454...","[0.20260028397050034, 0.4487850017126729, 0.42...",0.797500,0.060517,0.024125,0.070992
69,"(2019-06-28 20:51:00, 2019-06-28 20:52:00)",2019-06-28 19:51:12.005985,"[[0.035, 0.03, 0.5608333333333333], [0.0, 0.0,...","[0.008115978024872017, 0.08279300354971632, 0....","[[0.0, 746.2686567164178, 750.4690431519699], ...",1.561769e+09,2019-06-29 01:51:00.036926,0.491667,2019-06-28 19:51:00.036926,"[798.0, 658.0, 654.0, 728.0, 787.0, 660.0, 725...",...,52,Available,745.485325,177.504781,"[1.0906920922457457, 0.8797948082499669, 0.102...","[0.6097376039961249, 0.4949111932916105, 0.159...",0.502531,0.046246,0.429843,0.045561
85,"(2019-06-28 15:59:00, 2019-06-28 16:00:00)",2019-06-28 14:59:07.023908,"[[0.05333333333333333, 0.08333333333333331, 0....","[0.30757565910541534, 0.036155018380249454, 0....","[[607.90273556231, 760.4562737642586, 0.0], [0...",1.561752e+09,2019-06-28 20:59:00.004741,0.467500,2019-06-28 14:59:00.004741,"[900.0, 906.0, 869.0, 955.0, 754.0, 851.0, 858...",...,44,Available,793.702836,205.704419,"[1.464769526595311, 1.7922785721192505, 0.3859...","[0.9872704978820249, 1.0462724561284449, 0.526...",0.463851,0.045515,0.045417,0.048798
96,"(2019-06-28 20:44:00, 2019-06-28 20:45:00)",2019-06-28 19:44:09.007559,"[[0.027999999999999997, 0.0925, 0.97], [0.0366...","[0.01465930379329028, 0.022077680691286433, 0....","[[625.9780907668231, 636.9426751592357, 710.47...",1.561769e+09,2019-06-29 01:44:00.020144,0.388333,2019-06-28 19:44:00.020144,"[727.0, 608.0, 616.0, 737.0, 717.0, 819.0, 746...",...,41,Available,752.528674,171.104036,"[1.013450540828317, 1.1483251255533369, 0.1437...","[0.617185314340192, 0.5794553592624409, 0.2240...",0.405239,0.047685,0.783383,0.047531
122,"(2019-06-28 21:19:00, 2019-06-28 21:20:00)",2019-06-28 20:19:15.002202,"[[0.075, 0.10300000000000001, 0.5725], [0.0385...","[0.014473138198576478, 0.03261656480183868, 0....","[[547.1956224350205, 577.2005772005773, 732.60...",1.561771e+09,2019-06-29 02:19:00.007352,0.658095,2019-06-28 20:19:00.007352,"[960.0, 839.0, 705.0, 863.0, 807.0, 913.0, 800...",...,46,Available,800.857515,109.106242,"[0.41207972719424746, 0.6572492421120939, 0.42...","[0.21539909068786867, 0.5244517235266903, 0.49...",0.581331,0.060847,0.050864,0.074344


In [None]:
data1 = np.concatenate([a[0] for a in all_data])
yld = np.concatenate([a[1] for a in all_data])
yld1 = yld[:,:2]
yld = yld[:,2:]
day_corr = np.concatenate([a[2] for a in all_data])

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size':25})
plt.figure(figsize=(16,8))
plt.boxplot(yld)
plt.ylabel('Minutes')
plt.xticks(range(1,yld.shape[1]+1),['ECG YIELD','PPG YIELD'])
plt.title('Stress yield across all participant days')
plt.show()

In [None]:
print(day_corr.shape)
day_corr = day_corr[~np.isnan(day_corr).any(axis=1)]
print(np.sum([a[4] for a in all_data]),'- Participant Days,',np.sum([a[3] for a in all_data]),'- Users')

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size':20})
plt.figure(figsize=(16,8))
plt.boxplot(day_corr[~np.isnan(day_corr).any(axis=1)][:,np.array([0,1,2])])
plt.ylim([-1,1])
plt.ylabel('Pearson Correlation')
plt.xticks(range(1,day_corr.shape[1]+1),['Original cStress','cStress with Weighted Features and weighted normalization','cStress with Weighted Features'],rotation=10)
plt.title('Correlation with ECG For Different Modes of Normalization')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size':20})
plt.figure(figsize=(16,8))
plt.boxplot(day_corr[~np.isnan(day_corr).any(axis=1)][:,np.array([0,1,2])])
plt.ylabel('Pearson Correlation')
plt.xticks(range(1,day_corr.shape[1]+1),['Original cStress','cStress with Weighted Features and weighted normalization','cStress with Weighted Features'],rotation=10)
plt.title('Correlation with ECG For Different Modes of Normalization')
plt.show()

In [None]:
data_all = pd.DataFrame(data1,columns=['quality','corr_orig','corr_new','corr_new1','corr_between','ppg_yield','ecg_yield'])
data_all1 = pd.DataFrame(yld1,columns=['quality','ppg_yield'])

corr_25 = data_all.groupby('quality').quantile(.5)
x = corr_25.index.values
x1 = np.unique(data_all1['quality'].values)
y = []
for a in x1:
    y.append(data_all1[data_all1.quality>=a]['ppg_yield'].sum()/60/np.sum([a[4] for a in all_data]))

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams.update({'font.size':20})
fig, ax1 = plt.subplots(figsize=(20,12))
ax2 = ax1.twinx()
ax1.plot(x,corr_25['corr_orig'].loc[x],label='Original Cstress')
ax1.plot(x,corr_25['corr_new'].loc[x],label='Weighted Normalization with weighted features')
ax1.plot(x,corr_25['corr_new1'].loc[x],label='Original Cstress using Weighted Features')
# ax1.plot(x,corr_25['corr_between'].loc[x],label='Original Normalization using auto Features')
ax2.plot(x1,y,label='PPG Yield')
ax1.grid()
# ax1.plot(x,corr_75['corr_orig'].loc[x],label='Original 75th')
# ax1.plot(x,corr_75['corr_new'].loc[x],label='Weighted 75th')
ax1.legend(fontsize=20)
ax1.set_xlabel('Quality Metric')
ax2.set_ylabel('Median Hours per Participant Day', color='g')
ax1.set_ylabel('Median Correlation Across all Participant Days', color='b')
plt.show()
#  plt.figure(figsize=(16,8))

In [None]:
data_all = pd.DataFrame(data1,columns=['quality','corr_orig','corr_new','corr_new1','corr_between','ppg_yield','ecg_yield'])
data_all1 = pd.DataFrame(yld1,columns=['quality','ppg_yield'])

corr_25 = data_all.groupby('quality').quantile(.5)
x = corr_25.index.values
x1 = np.unique(data_all1['quality'].values)
y = []
for a in x1:
    y.append(data_all1[data_all1.quality>=a]['ppg_yield'].sum()/60/np.sum([a[4] for a in all_data]))

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams.update({'font.size':20})
fig, ax1 = plt.subplots(figsize=(20,12))
ax2 = ax1.twinx()
ax1.plot(x,corr_25['corr_orig'].loc[x],label='Original Cstress')
ax1.plot(x,corr_25['corr_new'].loc[x],label='Weighted Normalization with weighted features')
ax1.plot(x,corr_25['corr_new1'].loc[x],label='Original Cstress using Weighted Features')
# ax1.plot(x,corr_25['corr_between'].loc[x],label='Original Normalization using auto Features')
ax2.plot(x1,y,label='PPG Yield')
ax1.grid()
# ax1.plot(x,corr_75['corr_orig'].loc[x],label='Original 75th')
# ax1.plot(x,corr_75['corr_new'].loc[x],label='Weighted 75th')
ax1.legend(fontsize=20)
ax1.set_xlabel('Quality Metric')
ax2.set_ylabel('Median Hours per Participant Day', color='g')
ax1.set_ylabel('Median Correlation Across all Participant Days', color='b')
plt.show()


# plt.figure(figsize=(16,8))



In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(figsize=(16,8))
plt.suptitle('')
c = data_all.boxplot(column=['corr_new'], by='quality', ax=ax,showfliers=True)
plt.ylim([-3,1])
plt.xticks(rotation=100)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(figsize=(16,8))
plt.suptitle('')
c = data_all.boxplot(column=['ppg_yield'], by='quality', ax=ax)

In [None]:
data_all.groupby('quality').quantile([.25,.75]).loc[(0.2, 0.25)]

In [None]:
import sklearn

In [None]:
sklearn.show_versions()

In [None]:
data_all1['quality']

In [None]:
import pickle
pickle.load(open('../models/stress_model_ecg_2.p','rb'))