In [5]:
from scipy.stats import iqr,skew,kurtosis
from datetime import datetime
from copy import deepcopy
import math
from scipy.stats import pearsonr
from sklearn.externals.joblib import Parallel,delayed
import warnings
import pandas as pd
import pickle
import os
import numpy as np
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    values, weights -- Numpy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return average, math.sqrt(variance)

def get_rr_features(a):
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])


def get_weighted_rr_features(a):
    a = np.repeat(a[:,0],np.int64(np.round(100*a[:,1])))
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])


def get_quality_features(a):
    feature = [np.percentile(a,50),np.mean(a),
               len(a[a>.2])/60,len(a[a>.6])/60]
    return np.array(feature)

def get_daywise(data):
    return [a for i,a in data.groupby(['user','day'],as_index=False) if a[['likelihood_max_array','rr_array']].dropna().shape[0]>60]

def parse_day_data(data_day):
    data_day['likelihood_max_array'] = data_day['likelihood_max_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['likelihood'] = data_day['likelihood_max_array'].apply(lambda a:np.max(a,axis=1))
    data_day['likelihood_ind'] = data_day['likelihood_max_array'].apply(lambda a:np.argmax(a,axis=1))
    data_day['rr_array'] = data_day['rr_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['length'] = data_day['rr_array'].apply(lambda a:a.shape[0])
    data_day = data_day[data_day.length>20]
    data_day['time'] = data_day['ltime'].apply(lambda a:datetime.timestamp(a))
    indexes = data_day['likelihood_ind'].values
    rr_arrays = data_day['rr_array'].values
    rrs = []
    for i,rr in enumerate(rr_arrays):
        index = indexes[i]
        frr = np.squeeze(np.array([rr[i,index[i]] for i in range(rr.shape[0])]))
        rrs.append(frr)
    data_day['rr'] = rrs
    data_day['rr_col'] = data_day.apply(lambda a: np.vstack([np.squeeze(a['rr']),np.squeeze(a['likelihood']),np.squeeze(a['activity'])]).T,
                     axis=1)
    return data_day

def remove_3sd(heart_rate_window):
    temp = deepcopy(heart_rate_window)
    try:
        r,tt = weighted_avg_and_std(heart_rate_window[heart_rate_window[:,1]>.25,0],heart_rate_window[heart_rate_window[:,1]>.25,1])
        index = np.where((heart_rate_window[:,0]<r+3*tt)&(heart_rate_window[:,0]>r-3*tt))[0]
        heart_rate_window = heart_rate_window[index]
    except:
        pass
    if heart_rate_window.shape[0]>10:
        return [heart_rate_window,'Available']
    else:
        return [temp[:10],'Not Available']

    
def parse_for_features(data_day):
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[np.where((a[:,1]>.05)&(a[:,0]>300)&(a[:,0]<1500)&(a[:,2]<.2))[0],:2])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:remove_3sd(a))
    data_day['length1'] = data_day['rr_col'].apply(lambda a:a[0].shape[0])
    data_day = data_day[data_day.length1>30]
    data_day['indicator'] = data_day['rr_col'].apply(lambda a:a[1])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[0])
    data_day['likelihood'] = data_day['rr_col'].apply(lambda a:a[:,1])
    data_day['rr'] = data_day['rr_col'].apply(lambda a:a[:,0])
    data_day['rr_col1'] = data_day.apply(lambda a:np.vstack([list(a['rr']),list(a['likelihood'])]).T,axis=1)
    data_day['rr_features'] = data_day['rr'].apply(lambda a:get_rr_features(a))
    data_day['rr_weighted_features'] = data_day['rr_col1'].apply(lambda a:get_weighted_rr_features(a))
    data_day['quality_features'] = data_day['likelihood'].apply(lambda a:get_quality_features(a))
    data_day['quality_mag'] = data_day['quality_features'].apply(lambda a:np.sum(a)/len(a))
    return data_day

def normalize_daywise(feature_matrix,quals1):
    for i in range(feature_matrix.shape[1]):
        m,s = weighted_avg_and_std(feature_matrix[:,i], quals1)
        feature_matrix[:,i]  = (feature_matrix[:,i] - m)/s
    return feature_matrix

def smooth(y, box_pts=10):
    box = np.ones(box_pts)/box_pts
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth

def parse_day_data_ecg(data_day):
    data_day = data_day[['ecg_rr_array','ltime','window']].dropna()
    data_day['count_ecg'] = data_day['ecg_rr_array'].apply(lambda a:len(a))
    data_day = data_day[data_day.count_ecg>20]
    data_day['ecg_rr_array_final'] = data_day['ecg_rr_array']
    data_day['ecg_features'] = data_day['ecg_rr_array_final'].apply(lambda a:get_rr_features(smooth(a)))
    return data_day

def parse_each_day_ppg_ecg(a):
    columns = ['window', 'ltime', 'likelihood_max_array', 'activity', 'rr_array',
       'time', 'timestamp', 'likelihood_mean', 'localtime', 'ecg_rr_array',
       'day', 'version', 'user', 'quality_features', 'activity_features', 'likelihood', 'likelihood_ind', 'length', 'rr', 'rr_col',
       'length1', 'indicator', 'rr_col1', 'rr_features',
       'rr_weighted_features', 'quality_mag', 'ecg_rr_array_final', 'ecg_features']
    ecg_columns = ['window', 'ecg_rr_array_final','ecg_features']
    a = a.drop(['stress_likelihood', 'stress_likelihood_ecg'],axis=1)
    a_ecg = pd.DataFrame([],columns=ecg_columns)
    if a['ecg_rr_array'].dropna().shape[0]>60:
        a_ecg = parse_day_data_ecg(deepcopy(a))
        a_ecg = a_ecg[ecg_columns]
    a_ppg = parse_day_data(a)
    a_ppg = parse_for_features(a_ppg)
    if a_ppg.shape[0]==0:
        return pd.DataFrame([],columns=columns)
    a_ppg = pd.merge(a_ppg, a_ecg, how='left',left_on=['window'],right_on=['window'])
    if a_ppg.shape[0]<60:
        return pd.DataFrame([],columns=columns)
    a_ppg = get_ecg_stress(a_ppg)
    a_ppg = get_ppg_stress(a_ppg)
    return a_ppg

def get_ppg_stress(a):
    clf = pickle.load(open('../models/stress_ecg_final.p','rb'))
    a_ecg = deepcopy(a[['window','rr_weighted_features','quality_mag']].dropna())
    quals1 = np.array(list(a_ecg['quality_mag'].values))
    feature_matrix = np.array(list(a_ecg['rr_weighted_features']))
    if len(feature_matrix)<10:
        a['stress_likelihood_ppg'] = np.nan
        return a
    rr_70th = np.percentile(feature_matrix[:,2],60)
    rr_95th = np.percentile(feature_matrix[:,2],99)
    index = np.where((feature_matrix[:,2]>rr_70th)&(feature_matrix[:,2]<rr_95th))[0]
    for i in range(feature_matrix.shape[1]):
        m,s = weighted_avg_and_std(feature_matrix[index,i], quals1[index])
        feature_matrix[:,i]  = (feature_matrix[:,i] - m)/s
    probs = clf.predict_proba(feature_matrix)[:,1]
    a_ecg['stress_likelihood_ppg'] = probs
    a_ecg = a_ecg.drop(['rr_weighted_features','quality_mag'],axis=1)
    a = pd.merge(a, a_ecg, how='left', left_on=['window'], right_on=['window'])
    return a

def get_ecg_stress(a):
    clf = pickle.load(open('../models/stress_ecg_final.p','rb'))
    a_ecg = deepcopy(a[['window','ecg_features']].dropna())
    feature_matrix = np.array(list(a_ecg['ecg_features']))
    if len(feature_matrix)<10:
        a['stress_likelihood_ecg'] = np.nan
        return a
    rr_70th = np.percentile(feature_matrix[:,2],60)
    rr_95th = np.percentile(feature_matrix[:,2],99)
    index = np.where((feature_matrix[:,2]>rr_70th)&(feature_matrix[:,2]<rr_95th))[0]
    means = np.mean(feature_matrix[index],axis=0)
    stds = np.std(feature_matrix[index],axis=0)
    feature_matrix = (feature_matrix - means)/stds
    probs = clf.predict_proba(feature_matrix)[:,1]
    a_ecg['stress_likelihood_ecg'] = probs
    a_ecg = a_ecg.drop(['ecg_features'],axis=1)
    a = pd.merge(a, a_ecg, how='left', left_on=['window'], right_on=['window'])
    return a
    
def parse_each_participant(directory,d):
    data = pickle.load(open(directory+d,'rb')).reset_index(drop=True)
    print(data.shape,d)
    ema = data[['user','day','window','time','ltime','all_scores','score','label']]
    data = data.drop(['all_scores','score','label'],axis=1)
    data_all = get_daywise(data)
    if len(data_all)==0:
        return 0
    final_output = Parallel(n_jobs=25,verbose=4)(delayed(parse_each_day_ppg_ecg)(a) for a in data_all)
#     final_output = [parse_each_day_ppg_ecg(a) for a in data_all]
    final_output = [a for a in final_output if a.shape[0]>0]
    if len(final_output)==0:
        return 0
    final_output = pd.concat(final_output)
    final_output['stress_likelihood_ppg_qual'] = final_output['stress_likelihood_ppg']
    print(final_output.shape)
#     print(final_output.shape,final_output.columns)
    pickle.dump([final_output,ema],open(directory1+d,'wb'))
    return 0

directory = '../../cc3/rice_data/ecg_ppg_25_left3/'
directory1 = '../../cc3/rice_data/ecg_ppg_25_left5/'
# all_data = Parallel(n_jobs=30,verbose=2)(delayed(parse_each_participant)(directory,d) for d in os.listdir(directory)[:2] if d[-1]=='p')
all_data = [parse_each_participant(directory,d) for d in os.listdir(directory) if d[-1]=='p']

(16888, 20) 35109a64-411d-4768-9602-c0a3d519a088.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    5.0s remaining:   32.4s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    5.7s remaining:    8.6s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    6.0s remaining:    3.0s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    6.6s finished


(14140, 31)
(5739, 20) 9b3e5f2e-99e8-4c4c-8580-6f4e2b107e37.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of   6 | elapsed:    3.1s remaining:    3.1s
[Parallel(n_jobs=25)]: Done   6 out of   6 | elapsed:    4.7s finished


(3869, 31)
(12828, 20) 3febca74-f12b-4a1b-a469-22d6cad30e74.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    3.4s remaining:    7.7s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    3.8s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    5.2s finished


(11256, 31)
(4424, 20) f35c1279-806e-4546-839b-037ee01b0116.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   7 | elapsed:    1.4s remaining:    3.5s
[Parallel(n_jobs=25)]: Done   4 out of   7 | elapsed:    1.8s remaining:    1.4s
[Parallel(n_jobs=25)]: Done   7 out of   7 | elapsed:    3.8s finished


(4186, 31)
(9946, 20) 19223eac-7f2e-429c-8fa7-0977eee8ae7c.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:    2.1s remaining:    3.8s
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:    3.1s remaining:    1.7s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:    4.3s finished


(7853, 31)
(16491, 20) 5a47080d-a1ec-48f2-a174-a6a017fcb100.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  16 | elapsed:    2.9s remaining:   20.0s
[Parallel(n_jobs=25)]: Done   7 out of  16 | elapsed:    3.6s remaining:    4.6s
[Parallel(n_jobs=25)]: Done  12 out of  16 | elapsed:    4.2s remaining:    1.4s
[Parallel(n_jobs=25)]: Done  16 out of  16 | elapsed:    4.7s finished


(13428, 31)
(7053, 20) 099b45df-6432-47d2-8332-a8a870ec79de.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of   6 | elapsed:    3.0s remaining:    3.0s
[Parallel(n_jobs=25)]: Done   6 out of   6 | elapsed:    4.7s finished


(5985, 31)
(6163, 20) 903c1dae-a771-405f-a021-6f175724adc4.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of   6 | elapsed:    2.7s remaining:    2.7s
[Parallel(n_jobs=25)]: Done   6 out of   6 | elapsed:    4.4s finished


(5245, 31)
(9636, 20) ad1e878e-7bea-48ca-b890-92770fe02a4c.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.4s remaining:    7.1s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    3.1s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    4.2s finished


(8426, 31)
(7309, 20) d768791c-7479-4aaf-a6c8-61e82f1517e8.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    1.7s remaining:    3.8s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    2.3s remaining:    1.5s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    3.6s finished


(5060, 31)
(5938, 20) 780e89b3-f6bf-4181-9f2c-7db941735c87.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of   8 | elapsed:    2.8s remaining:    2.8s
[Parallel(n_jobs=25)]: Done   8 out of   8 | elapsed:    4.2s finished


(4939, 31)
(7042, 20) 8008f00d-2549-46e4-ab1f-01542c1076e2.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  10 | elapsed:    1.7s remaining:    4.1s
[Parallel(n_jobs=25)]: Done   6 out of  10 | elapsed:    2.0s remaining:    1.3s
[Parallel(n_jobs=25)]: Done  10 out of  10 | elapsed:    3.7s finished


(5646, 31)
(12353, 20) 34e42cf6-7c34-417c-a003-874e3b6151e7.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  11 | elapsed:    3.5s remaining:    6.1s
[Parallel(n_jobs=25)]: Done   7 out of  11 | elapsed:    3.8s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  11 out of  11 | elapsed:    5.2s finished


(10175, 31)
(14518, 20) e099e913-4796-4408-af63-1d35c84f29fd.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    1.9s remaining:   12.3s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    3.3s remaining:    4.9s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    3.9s remaining:    2.0s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    5.3s finished


(11978, 31)
(18852, 20) d3cf5812-85fd-4328-9b2a-1b3b6b2cd0b0.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    3.4s remaining:   22.3s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    4.3s remaining:    6.5s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    4.7s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    5.7s finished


(15558, 31)
(5743, 20) b0954814-ad5a-4a8f-ac5a-8436a70889d0.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of   8 | elapsed:    1.9s remaining:    1.9s
[Parallel(n_jobs=25)]: Done   8 out of   8 | elapsed:    3.8s finished


(4398, 31)
(6153, 20) 0c2f18b6-142e-4e9a-ab1b-4d4ea2e91280.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   7 | elapsed:    1.5s remaining:    3.8s
[Parallel(n_jobs=25)]: Done   4 out of   7 | elapsed:    2.2s remaining:    1.7s
[Parallel(n_jobs=25)]: Done   7 out of   7 | elapsed:    4.2s finished


(5688, 31)
(7406, 20) bde40f50-8e35-4707-8260-b69f07773c4d.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.1s remaining:    6.3s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    2.4s remaining:    1.7s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    3.2s finished


(4108, 31)
(11715, 20) 98fca87c-9940-4666-9893-9f8ae2418cb8.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  11 | elapsed:    3.5s remaining:    6.2s
[Parallel(n_jobs=25)]: Done   7 out of  11 | elapsed:    4.0s remaining:    2.3s
[Parallel(n_jobs=25)]: Done  11 out of  11 | elapsed:    4.4s finished


(8905, 31)
(10856, 20) 780f84d0-eb06-4696-a47d-9320bc17d117.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    2.5s remaining:    5.7s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    3.5s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    4.2s finished


(8597, 31)
(16131, 20) 808b555a-6573-457e-b1c6-e008594b0f9a.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:    3.7s remaining:    6.6s
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:    4.3s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:    4.9s finished


(12982, 31)
(11479, 20) cd575a70-f1a4-4a2b-ac0d-dd0c330a7912.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   9 | elapsed:    3.4s remaining:   11.8s
[Parallel(n_jobs=25)]: Done   5 out of   9 | elapsed:    3.8s remaining:    3.1s
[Parallel(n_jobs=25)]: Done   9 out of   9 | elapsed:    4.4s finished


(8168, 31)
(10631, 20) bb0fe5f5-798b-45ea-be50-8e56e3116369.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    2.2s remaining:   14.4s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    2.6s remaining:    3.9s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    3.2s remaining:    1.6s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    3.8s finished


(8878, 31)
(6646, 20) a19eb22e-8c99-42d5-8a68-caad1dfe9361.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of   6 | elapsed:    2.1s remaining:    2.1s
[Parallel(n_jobs=25)]: Done   6 out of   6 | elapsed:    3.7s finished


(3579, 31)
(7326, 20) 9998aedf-5144-4402-9806-fd0965ca85c0.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  11 | elapsed:    2.2s remaining:    3.9s
[Parallel(n_jobs=25)]: Done   7 out of  11 | elapsed:    2.6s remaining:    1.5s
[Parallel(n_jobs=25)]: Done  11 out of  11 | elapsed:    3.4s finished


(4792, 31)
(12415, 20) f0286b14-18d2-46bd-845e-f83b43a2ef7b.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    2.9s remaining:    6.5s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    3.6s remaining:    2.3s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    4.8s finished


(9575, 31)
(6530, 20) a94e78e6-acb4-4a71-920a-92b1858d51bd.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   9 | elapsed:    1.5s remaining:    5.4s
[Parallel(n_jobs=25)]: Done   5 out of   9 | elapsed:    2.5s remaining:    2.0s
[Parallel(n_jobs=25)]: Done   9 out of   9 | elapsed:    3.5s finished


(4420, 31)
(16217, 20) 5f3f7553-6d2f-4c08-adb9-dbc3e88ba0aa.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    2.7s remaining:   17.5s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    3.8s remaining:    5.7s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    4.4s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    4.9s finished


(14414, 31)
(7128, 20) b71b2071-6330-434d-a2ab-8e929e9b96a9.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    1.4s remaining:    9.2s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    1.7s remaining:    2.5s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    2.1s remaining:    1.0s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    2.7s finished


(5001, 31)
(12162, 20) b53e7168-0a87-4646-b389-fb0fe60cc36a.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:    3.2s remaining:    5.7s
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:    3.7s remaining:    2.0s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:    4.4s finished


(10374, 31)
(17308, 20) 9197be51-f220-4c63-a6a8-3ec1bbd50810.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    2.8s remaining:   18.4s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    3.8s remaining:    5.7s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    5.2s remaining:    2.6s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    6.1s finished


(15924, 31)
(12585, 20) 892e71e0-a5a4-4315-89a4-fa5518d78591.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.8s remaining:    8.3s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    3.5s remaining:    2.5s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    4.7s finished


(7749, 31)
(9642, 20) 96f6e25f-4dd0-4070-a9ac-b04957969382.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.4s remaining:    7.1s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    3.0s remaining:    2.1s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    4.0s finished


(5166, 31)
(14686, 20) cfe02b15-0332-4590-9ac6-9ef2eb8b3edd.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    3.5s remaining:    7.8s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    4.3s remaining:    2.7s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    5.2s finished


(11343, 31)
(11054, 20) 896d9cb5-2e54-4900-9b8a-58c087549d19.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.5s remaining:    7.5s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    3.7s remaining:    2.6s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    4.6s finished


(9217, 31)
(16020, 20) ea2fa266-3e43-4552-8c74-cba474ae0038.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:    3.6s remaining:    6.5s
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:    4.0s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:    5.0s finished


(13447, 31)
(7390, 20) 263b1782-923d-4bb3-b52d-4c1926e81f1f.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   9 | elapsed:    1.5s remaining:    5.4s
[Parallel(n_jobs=25)]: Done   5 out of   9 | elapsed:    3.0s remaining:    2.4s
[Parallel(n_jobs=25)]: Done   9 out of   9 | elapsed:    4.0s finished


(6121, 31)
(10829, 20) 9bc2eed3-f75d-479c-9665-75df853bc8ac.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   9 | elapsed:    3.5s remaining:   12.3s
[Parallel(n_jobs=25)]: Done   5 out of   9 | elapsed:    3.8s remaining:    3.1s
[Parallel(n_jobs=25)]: Done   9 out of   9 | elapsed:    4.6s finished


(8446, 31)
(11825, 20) 9744e4ae-63d8-49df-be6a-37cbb24532a1.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    2.3s remaining:   14.7s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    2.8s remaining:    4.1s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    3.4s remaining:    1.7s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    4.9s finished


(9416, 31)
(11843, 20) 2d8b5a8c-e990-4442-abf6-578e96d2f5eb.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    2.4s remaining:    5.3s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    3.5s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    4.4s finished


(8175, 31)
(11461, 20) 897fdfcf-9004-4ef6-bf9a-8d3fe339c8ce.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.6s remaining:    7.8s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    3.3s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    4.9s finished


(9321, 31)
(19188, 20) 9a3bd464-f273-4f97-a48c-1f3c6a705a69.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    3.7s remaining:   23.9s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    5.7s remaining:    8.5s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    5.9s remaining:    3.0s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    6.2s finished


(17016, 31)
(10692, 20) 6ef875b3-2f7e-48b8-bf00-de3ee1316830.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  13 | elapsed:    2.3s remaining:    5.3s
[Parallel(n_jobs=25)]: Done   8 out of  13 | elapsed:    3.2s remaining:    2.0s
[Parallel(n_jobs=25)]: Done  13 out of  13 | elapsed:    4.4s finished


(9246, 31)
(10425, 20) 05846fcf-1dd9-4f98-b17b-1ce6e624c0a7.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:    2.5s remaining:    4.5s
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:    3.3s remaining:    1.9s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:    4.2s finished


(9075, 31)
(10883, 20) 4bf6078d-afcd-432a-a8a5-8b5e8a4eda9e.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:    2.9s remaining:    5.2s
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:    3.4s remaining:    1.9s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:    4.7s finished


(205360, 31)
(14152, 20) 2333036a-2f50-49ca-a119-3c5d66399fe4.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   4 out of  11 | elapsed:    4.5s remaining:    7.9s
[Parallel(n_jobs=25)]: Done   7 out of  11 | elapsed:    5.0s remaining:    2.9s
[Parallel(n_jobs=25)]: Done  11 out of  11 | elapsed:    6.3s finished


(10920, 31)
(15306, 20) 22c85326-97bf-4e4b-90c4-4255c144ae1b.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    3.1s remaining:   19.8s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    4.3s remaining:    6.4s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    4.4s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    5.0s finished


(10780, 31)
(17393, 20) f8d33ca1-e0fa-4b59-a7c2-b1aee8afcaea.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    2.8s remaining:   18.0s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    3.5s remaining:    5.3s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    4.7s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    5.4s finished


(11955, 31)
(15704, 20) c64ca471-369e-43fa-a07b-8260fd1c745c.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    2.9s remaining:   18.6s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    4.0s remaining:    6.0s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    4.4s remaining:    2.2s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    5.3s finished


(12728, 31)
(16931, 20) fdddb3bd-bb88-458f-bcc8-e50bb3f87742.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:    4.4s remaining:    7.8s
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:    4.7s remaining:    2.6s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:    5.5s finished


(13373, 31)
(12546, 20) 02543bbf-84c2-4076-8547-c8a5f451ea02.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.6s remaining:    7.9s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    3.9s remaining:    2.8s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    5.1s finished


(9667, 31)
(19842, 20) 0c726695-f016-4019-9aab-c292298ee10c.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    3.6s remaining:   23.3s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    4.5s remaining:    6.7s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    5.0s remaining:    2.5s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    6.2s finished


(15252, 31)
(17238, 20) 87a2bf88-ef4e-4bd5-96b6-eda8faac6a8e.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  16 | elapsed:    2.9s remaining:   20.2s
[Parallel(n_jobs=25)]: Done   7 out of  16 | elapsed:    4.1s remaining:    5.3s
[Parallel(n_jobs=25)]: Done  12 out of  16 | elapsed:    4.7s remaining:    1.6s
[Parallel(n_jobs=25)]: Done  16 out of  16 | elapsed:    5.4s finished


(13056, 31)
(18657, 20) 8d96c9a4-a13b-4729-adf3-969e84b9a6d2.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of  15 | elapsed:    3.7s remaining:   24.0s
[Parallel(n_jobs=25)]: Done   6 out of  15 | elapsed:    4.0s remaining:    5.9s
[Parallel(n_jobs=25)]: Done  10 out of  15 | elapsed:    4.8s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  15 out of  15 | elapsed:    6.0s finished


(13101, 31)
(2315, 20) e41ecb05-e159-4abc-858e-1d305706e8fc.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of   6 | elapsed:    1.2s remaining:    1.2s
[Parallel(n_jobs=25)]: Done   6 out of   6 | elapsed:    1.9s finished


(1273, 31)
(4156, 20) 0d4e87ed-9694-4ba8-b529-d8f2def047cf.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   7 | elapsed:    1.4s remaining:    3.6s
[Parallel(n_jobs=25)]: Done   4 out of   7 | elapsed:    1.7s remaining:    1.2s
[Parallel(n_jobs=25)]: Done   7 out of   7 | elapsed:    2.9s finished


(4017, 31)
(3252, 20) b62147b5-0b26-4490-acf3-6e1fd29f909c.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of   6 | elapsed:    1.4s remaining:    1.4s
[Parallel(n_jobs=25)]: Done   6 out of   6 | elapsed:    2.4s finished


(2441, 31)
(10844, 20) 4ddc3405-d256-4f66-95df-2b13bf69a616.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:    2.3s remaining:    6.8s
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:    3.3s remaining:    2.4s
[Parallel(n_jobs=25)]: Done  12 out of  12 | elapsed:    3.8s finished


(8082, 31)
(7352, 20) 3ca9773c-2c8b-46da-a68c-909685ddcf08.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   7 | elapsed:    2.3s remaining:    5.7s
[Parallel(n_jobs=25)]: Done   4 out of   7 | elapsed:    2.7s remaining:    2.0s
[Parallel(n_jobs=25)]: Done   7 out of   7 | elapsed:    4.2s finished


(6806, 31)
(3726, 20) 19e66451-4521-447a-ae3b-d91cb814b6c5.p


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   2 out of   5 | elapsed:    1.4s remaining:    2.1s
[Parallel(n_jobs=25)]: Done   5 out of   5 | elapsed:    3.8s finished


(3213, 31)


In [None]:
from scipy.stats import iqr,skew,kurtosis
from datetime import datetime
from copy import deepcopy
import math
from scipy.stats import pearsonr
from sklearn.externals.joblib import Parallel,delayed
import warnings
import pandas as pd
import pickle
import os
import numpy as np
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    values, weights -- Numpy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return average, math.sqrt(variance)

def get_rr_features(a):
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])


def get_weighted_rr_features(a):
    a = np.repeat(a[:,0],np.int64(np.round(100*a[:,1])))
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])


def get_quality_features(a):
    feature = [np.percentile(a,50),np.mean(a),
               len(a[a>.2])/60,len(a[a>.6])/60]
    return np.array(feature)

def get_daywise(data):
    return [a for i,a in data.groupby(['user','day'],as_index=False) if a[['likelihood_max_array','rr_array']].dropna().shape[0]>60]

def parse_day_data(data_day):
    data_day['likelihood_max_array'] = data_day['likelihood_max_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['likelihood'] = data_day['likelihood_max_array'].apply(lambda a:np.max(a,axis=1))
    data_day['likelihood_ind'] = data_day['likelihood_max_array'].apply(lambda a:np.argmax(a,axis=1))
    data_day['rr_array'] = data_day['rr_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['length'] = data_day['rr_array'].apply(lambda a:a.shape[0])
    data_day = data_day[data_day.length>20]
    data_day['time'] = data_day['ltime'].apply(lambda a:datetime.timestamp(a))
    indexes = data_day['likelihood_ind'].values
    rr_arrays = data_day['rr_array'].values
    rrs = []
    for i,rr in enumerate(rr_arrays):
        index = indexes[i]
        frr = np.squeeze(np.array([rr[i,index[i]] for i in range(rr.shape[0])]))
        rrs.append(frr)
    data_day['rr'] = rrs
    data_day['rr_col'] = data_day.apply(lambda a: np.vstack([np.squeeze(a['rr']),np.squeeze(a['likelihood']),np.squeeze(a['activity'])]).T,
                     axis=1)
    return data_day

def remove_3sd(heart_rate_window):
    temp = deepcopy(heart_rate_window)
    try:
        r,tt = weighted_avg_and_std(heart_rate_window[heart_rate_window[:,1]>.25,0],heart_rate_window[heart_rate_window[:,1]>.25,1])
        index = np.where((heart_rate_window[:,0]<r+3*tt)&(heart_rate_window[:,0]>r-3*tt))[0]
        heart_rate_window = heart_rate_window[index]
    except:
        pass
    if heart_rate_window.shape[0]>10:
        return [heart_rate_window,'Available']
    else:
        return [temp[:10],'Not Available']

    
def parse_for_features(data_day):
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[np.where((a[:,1]>.05)&(a[:,0]>300)&(a[:,0]<1500)&(a[:,2]<.2))[0],:2])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:remove_3sd(a))
    data_day['length1'] = data_day['rr_col'].apply(lambda a:a[0].shape[0])
    data_day = data_day[data_day.length1>30]
    data_day['indicator'] = data_day['rr_col'].apply(lambda a:a[1])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[0])
    data_day['likelihood'] = data_day['rr_col'].apply(lambda a:a[:,1])
    data_day['rr'] = data_day['rr_col'].apply(lambda a:a[:,0])
    data_day['rr_col1'] = data_day.apply(lambda a:np.vstack([list(a['rr']),list(a['likelihood'])]).T,axis=1)
    data_day['rr_features'] = data_day['rr'].apply(lambda a:get_rr_features(a))
    data_day['rr_weighted_features'] = data_day['rr_col1'].apply(lambda a:get_weighted_rr_features(a))
    data_day['quality_features'] = data_day['likelihood'].apply(lambda a:get_quality_features(a))
    data_day['quality_mag'] = data_day['quality_features'].apply(lambda a:np.sum(a)/len(a))
    return data_day

def normalize_daywise(feature_matrix,quals1):
    for i in range(feature_matrix.shape[1]):
        m,s = weighted_avg_and_std(feature_matrix[:,i], quals1)
        feature_matrix[:,i]  = (feature_matrix[:,i] - m)/s
    return feature_matrix


def parse_day_data_ecg(data_day):
    data_day = data_day[['ecg_rr_array','ltime','window']].dropna()
    data_day['count_ecg'] = data_day['ecg_rr_array'].apply(lambda a:len(a))
    data_day = data_day[data_day.count_ecg>20]
    data_day['ecg_rr_array_final'] = data_day['ecg_rr_array']
    data_day['ecg_features'] = data_day['ecg_rr_array_final'].apply(lambda a:get_rr_features(a))
    return data_day

def parse_each_day_ppg_ecg(a):
    columns = ['window', 'ltime', 'likelihood_max_array', 'activity', 'rr_array',
       'time', 'timestamp', 'likelihood_mean', 'localtime', 'ecg_rr_array',
       'day', 'version', 'user', 'quality_features', 'activity_features', 'likelihood', 'likelihood_ind', 'length', 'rr', 'rr_col',
       'length1', 'indicator', 'rr_col1', 'rr_features',
       'rr_weighted_features', 'quality_mag','window', 'ecg_rr_array_final', 'ecg_features']
    ecg_columns = ['window', 'ecg_rr_array_final','ecg_features']
    a = a.drop(['stress_likelihood', 'stress_likelihood_ecg'],axis=1)
    a_ecg = pd.DataFrame([],columns=ecg_columns)
    if a['ecg_rr_array'].dropna().shape[0]>60:
        a_ecg = parse_day_data_ecg(deepcopy(a))
        a_ecg = a_ecg[ecg_columns]
    a_ppg = parse_day_data(a)
    a_ppg = parse_for_features(a_ppg)
    if a_ppg.shape[0]==0:
        return pd.DataFrame([],columns=columns)
#     a_ecg = a_ecg.rename({'window': 'window1'}, axis=1)
    a_ppg = pd.merge(a_ppg, a_ecg, how='left', left_on=['window'],right_on=['window'],suffixes=('', '_7'))
#     print(a_ppg.columns,'r')
    return a_ppg[columns]

def get_both_stress(a):
    clf = pickle.load(open('../models/stress_ecg_final.p','rb'))
    a_ecg = deepcopy(a[['window','ecg_features']].dropna())
    print(a_ecg.columns,'--'*20)
    feature_matrix = np.array(list(a_ecg['ecg_features']))
    rr_70th = np.percentile(feature_matrix[:,2],60)
    rr_95th = np.percentile(feature_matrix[:,2],99)
    index = np.where((feature_matrix[:,2]>rr_70th)&(feature_matrix[:,2]<rr_95th))[0]
    means = np.mean(feature_matrix[index],axis=0)
    stds = np.std(feature_matrix[index],axis=0)
    feature_matrix = (feature_matrix - means)/stds
    probs = clf.predict_proba(feature_matrix)[:,1]
    a_ecg['stress_likelihood_ecg'] = probs
    a = a.drop(['ecg_features'],axis=1)
#     print(a.columns,a_ecg.columns)
#     a_ecg = a_ecg.rename({'window': 'window1'}, axis=1)
    print(a_ecg.columns)
    a = pd.merge(a, a_ecg, how='left', on=['window'])
#     a = a.drop(['window1'],axis=1)
#     plt.figure(figsize=(16,10))
#     plt.plot(a['timestamp'],a['stress_likelihood_ecg'])
#     plt.show()
    return a
    
def parse_each_participant(directory,d):
    data = pickle.load(open(directory+d,'rb')).reset_index(drop=True)
    ema = data[['user','day','window','time','ltime','all_scores','score','label']]
    data = data.drop(['all_scores','score','label'],axis=1)
    data_all = get_daywise(data)
    if len(data_all)==0:
        return 0
    final_output = Parallel(n_jobs=25,verbose=4)(delayed(parse_each_day_ppg_ecg)(a) for a in data_all)
#     final_output = [parse_each_day_ppg_ecg(a) for a in data_all]
    final_output = [a for a in final_output if a.shape[0]>0]
    if len(final_output)==0:
        return 0
    final_output = pd.concat(final_output)
    print(final_output.columns)
    final_output = get_both_stress(final_output)
#     pickle.dump([final_output,ema],open(directory1+d,'wb'))
    return 0
directory = '../../cc3/rice_data/ecg_ppg_25_left3/'
directory1 = '../../cc3/rice_data/ecg_ppg_25_left5/'
# all_data = Parallel(n_jobs=30,verbose=2)(delayed(parse_each_participant)(directory,d) for d in os.listdir(directory)[:2] if d[-1]=='p')
all_data = [parse_each_participant(directory,d) for d in os.listdir(directory) if d[-1]=='p']

In [None]:
data1 = np.concatenate([a[0] for a in all_data])
yld = np.concatenate([a[1] for a in all_data])
yld1 = yld[:,:2]
yld = yld[:,2:]
day_corr = np.concatenate([a[2] for a in all_data])

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size':25})
plt.figure(figsize=(16,8))
plt.boxplot(yld)
plt.ylabel('Minutes')
plt.xticks(range(1,yld.shape[1]+1),['ECG YIELD','PPG YIELD'])
plt.title('Stress yield across all participant days')
plt.show()

In [None]:
print(day_corr.shape)
day_corr = day_corr[~np.isnan(day_corr).any(axis=1)]
print(np.sum([a[4] for a in all_data]),'- Participant Days,',np.sum([a[3] for a in all_data]),'- Users')

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size':20})
plt.figure(figsize=(16,8))
plt.boxplot(day_corr[~np.isnan(day_corr).any(axis=1)][:,np.array([0,1,2])])
plt.ylim([-1,1])
plt.ylabel('Pearson Correlation')
plt.xticks(range(1,day_corr.shape[1]+1),['Original cStress','cStress with Weighted Features and weighted normalization','cStress with Weighted Features'],rotation=10)
plt.title('Correlation with ECG For Different Modes of Normalization')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size':20})
plt.figure(figsize=(16,8))
plt.boxplot(day_corr[~np.isnan(day_corr).any(axis=1)][:,np.array([0,1,2])])
plt.ylabel('Pearson Correlation')
plt.xticks(range(1,day_corr.shape[1]+1),['Original cStress','cStress with Weighted Features and weighted normalization','cStress with Weighted Features'],rotation=10)
plt.title('Correlation with ECG For Different Modes of Normalization')
plt.show()

In [None]:
data_all = pd.DataFrame(data1,columns=['quality','corr_orig','corr_new','corr_new1','corr_between','ppg_yield','ecg_yield'])
data_all1 = pd.DataFrame(yld1,columns=['quality','ppg_yield'])

corr_25 = data_all.groupby('quality').quantile(.5)
x = corr_25.index.values
x1 = np.unique(data_all1['quality'].values)
y = []
for a in x1:
    y.append(data_all1[data_all1.quality>=a]['ppg_yield'].sum()/60/np.sum([a[4] for a in all_data]))

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams.update({'font.size':20})
fig, ax1 = plt.subplots(figsize=(20,12))
ax2 = ax1.twinx()
ax1.plot(x,corr_25['corr_orig'].loc[x],label='Original Cstress')
ax1.plot(x,corr_25['corr_new'].loc[x],label='Weighted Normalization with weighted features')
ax1.plot(x,corr_25['corr_new1'].loc[x],label='Original Cstress using Weighted Features')
# ax1.plot(x,corr_25['corr_between'].loc[x],label='Original Normalization using auto Features')
ax2.plot(x1,y,label='PPG Yield')
ax1.grid()
# ax1.plot(x,corr_75['corr_orig'].loc[x],label='Original 75th')
# ax1.plot(x,corr_75['corr_new'].loc[x],label='Weighted 75th')
ax1.legend(fontsize=20)
ax1.set_xlabel('Quality Metric')
ax2.set_ylabel('Median Hours per Participant Day', color='g')
ax1.set_ylabel('Median Correlation Across all Participant Days', color='b')
plt.show()
#  plt.figure(figsize=(16,8))

In [None]:
data_all = pd.DataFrame(data1,columns=['quality','corr_orig','corr_new','corr_new1','corr_between','ppg_yield','ecg_yield'])
data_all1 = pd.DataFrame(yld1,columns=['quality','ppg_yield'])

corr_25 = data_all.groupby('quality').quantile(.5)
x = corr_25.index.values
x1 = np.unique(data_all1['quality'].values)
y = []
for a in x1:
    y.append(data_all1[data_all1.quality>=a]['ppg_yield'].sum()/60/np.sum([a[4] for a in all_data]))

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams.update({'font.size':20})
fig, ax1 = plt.subplots(figsize=(20,12))
ax2 = ax1.twinx()
ax1.plot(x,corr_25['corr_orig'].loc[x],label='Original Cstress')
ax1.plot(x,corr_25['corr_new'].loc[x],label='Weighted Normalization with weighted features')
ax1.plot(x,corr_25['corr_new1'].loc[x],label='Original Cstress using Weighted Features')
# ax1.plot(x,corr_25['corr_between'].loc[x],label='Original Normalization using auto Features')
ax2.plot(x1,y,label='PPG Yield')
ax1.grid()
# ax1.plot(x,corr_75['corr_orig'].loc[x],label='Original 75th')
# ax1.plot(x,corr_75['corr_new'].loc[x],label='Weighted 75th')
ax1.legend(fontsize=20)
ax1.set_xlabel('Quality Metric')
ax2.set_ylabel('Median Hours per Participant Day', color='g')
ax1.set_ylabel('Median Correlation Across all Participant Days', color='b')
plt.show()


# plt.figure(figsize=(16,8))



In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(figsize=(16,8))
plt.suptitle('')
c = data_all.boxplot(column=['corr_new'], by='quality', ax=ax,showfliers=True)
plt.ylim([-3,1])
plt.xticks(rotation=100)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(figsize=(16,8))
plt.suptitle('')
c = data_all.boxplot(column=['ppg_yield'], by='quality', ax=ax)

In [None]:
data_all.groupby('quality').quantile([.25,.75]).loc[(0.2, 0.25)]

In [None]:
import sklearn

In [None]:
sklearn.show_versions()

In [None]:
data_all1['quality']

In [None]:
import pickle
pickle.load(open('../models/stress_model_ecg_2.p','rb'))