In [16]:
from scipy.stats import iqr,skew,kurtosis
from datetime import datetime
from copy import deepcopy
import math
from scipy.stats import pearsonr
from joblib import Parallel,delayed
import warnings
import pandas as pd
import pickle
import os
import numpy as np
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    values, weights -- Numpy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return average, math.sqrt(variance)

def get_rr_features(a):
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])


def get_weighted_rr_features(a):
    a = np.repeat(a[:,0],np.int64(np.round(100*a[:,1])))
    return np.array([np.var(a),iqr(a),np.mean(a),np.median(a),np.percentile(a,80),np.percentile(a,20),60000/np.median(a)])


def get_quality_features(a,n=60):
    feature = [np.percentile(a,50),np.mean(a),
               len(a[a>.2])/n,len(a[a>.6])/n]
    return np.array(feature)

def get_daywise(data):
    return [a for i,a in data.groupby(['user','day'],as_index=False) if a[['likelihood_max_array','rr_array']].dropna().shape[0]>60]

def parse_day_data(data_day):
    data_day['likelihood_max_array'] = data_day['likelihood_max_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['likelihood'] = data_day['likelihood_max_array'].apply(lambda a:np.max(a,axis=1))
    data_day['likelihood_ind'] = data_day['likelihood_max_array'].apply(lambda a:np.argmax(a,axis=1))
    data_day['rr_array'] = data_day['rr_array'].apply(lambda a:np.squeeze(a).reshape(-1,3))
    data_day['length'] = data_day['rr_array'].apply(lambda a:a.shape[0])
    data_day = data_day[data_day.length>20]
    if data_day.shape[0]<30:
        return pd.DataFrame([],columns=data_day.columns)
    data_day['time'] = data_day['ltime'].apply(lambda a:datetime.timestamp(a))
    indexes = data_day['likelihood_ind'].values
    rr_arrays = data_day['rr_array'].values
    rrs = []
    for i,rr in enumerate(rr_arrays):
        index = indexes[i]
        frr = np.squeeze(np.array([rr[i,index[i]] for i in range(rr.shape[0])]))
        rrs.append(frr)
    data_day['rr'] = rrs
    data_day['rr_col'] = data_day.apply(lambda a: np.vstack([np.squeeze(a['rr']),np.squeeze(a['likelihood']),np.squeeze(a['activity'])]).T,
                     axis=1)
    return data_day

def remove_3sd(heart_rate_window):
    temp = deepcopy(heart_rate_window)
    try:
        r,tt = weighted_avg_and_std(heart_rate_window[heart_rate_window[:,1]>.25,0],heart_rate_window[heart_rate_window[:,1]>.25,1])
        index = np.where((heart_rate_window[:,0]<r+2*tt)&(heart_rate_window[:,0]>r-2*tt))[0]
        heart_rate_window = heart_rate_window[index]
    except:
        pass
    if heart_rate_window.shape[0]>10:
        return [heart_rate_window,'Available']
    else:
        return [temp[:10],'Not Available']


import numpy as np
from scipy import interpolate, signal
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import matplotlib.patches as mpatches
from collections import OrderedDict

def frequencyDomain(RRints,tmStamps, band_type = None, lf_bw = 0.11, hf_bw = 0.1, plot = 0):
    
    #Remove ectopic beats
    #RR intervals differing by more than 20% from the one proceeding it are removed
    NNs = []
    tss = []
    for c, rr in enumerate(RRints):        
        if abs(rr - RRints[c-1]) <= 0.20 * RRints[c-1]:
            NNs.append(rr)
            tss.append(tmStamps[c])
            
            
    frequency_range = np.linspace(0.001, 1, 10000)
    NNs = np.array(NNs)
    NNs = NNs - np.mean(NNs)
    result = signal.lombscargle(tss, NNs, frequency_range)
        
    #Pwelch w/ zero pad     
    fxx = frequency_range 
    pxx = result 
    
    vlf= (0.003, 0.04)
    lf = (0.04, 0.15)
    hf = (0.15, 0.4)
    
    plot_labels = ['VLF', 'LF', 'HF']
        
    if band_type == 'adapted':     
            
        vlf_peak = fxx[np.where(pxx == np.max(pxx[np.logical_and(fxx >= vlf[0], fxx < vlf[1])]))[0][0]] 
        lf_peak = fxx[np.where(pxx == np.max(pxx[np.logical_and(fxx >= lf[0], fxx < lf[1])]))[0][0]]
        hf_peak = fxx[np.where(pxx == np.max(pxx[np.logical_and(fxx >= hf[0], fxx < hf[1])]))[0][0]]
    
        peak_freqs =  (vlf_peak, lf_peak, hf_peak) 
            
        hf = (peak_freqs[2] - hf_bw/2, peak_freqs[2] + hf_bw/2)
        lf = (peak_freqs[1] - lf_bw/2, peak_freqs[1] + lf_bw/2)   
        vlf = (0.003, lf[0])
        
        if lf[0] < 0:
            print('***Warning***: Adapted LF band lower bound spills into negative frequency range')
            print('Lower thresold of LF band has been set to zero')
            print('Adjust LF and HF bandwidths accordingly')
            lf = (0, lf[1])        
            vlf = (0, 0)
        elif hf[0] < 0:
            print('***Warning***: Adapted HF band lower bound spills into negative frequency range')
            print('Lower thresold of HF band has been set to zero')
            print('Adjust LF and HF bandwidths accordingly')
            hf = (0, hf[1])        
            lf = (0, 0)        
            vlf = (0, 0)
            
        plot_labels = ['Adapted_VLF', 'Adapted_LF', 'Adapted_HF']

    df = fxx[1] - fxx[0]
    vlf_power = np.trapz(pxx[np.logical_and(fxx >= vlf[0], fxx < vlf[1])], dx = df)      
    lf_power = np.trapz(pxx[np.logical_and(fxx >= lf[0], fxx < lf[1])], dx = df)            
    hf_power = np.trapz(pxx[np.logical_and(fxx >= hf[0], fxx < hf[1])], dx = df)             
    totalPower = vlf_power + lf_power + hf_power
    
    #Normalize and take log
    vlf_NU_log = np.log((vlf_power / (totalPower - vlf_power)) + 1)
    lf_NU_log = np.log((lf_power / (totalPower - vlf_power)) + 1)
    hf_NU_log = np.log((hf_power / (totalPower - vlf_power)) + 1)
    lfhfRation_log = np.log((lf_power / hf_power) + 1)   
    
    freqDomainFeats = {'VLF_Power': vlf_NU_log, 'LF_Power': lf_NU_log,
                       'HF_Power': hf_NU_log, 'LF/HF': lfhfRation_log}
                       
    if plot == 1:
        #Plot option
        freq_bands = {'vlf': vlf, 'lf': lf, 'hf': hf}
        freq_bands = OrderedDict(sorted(freq_bands.items(), key=lambda t: t[0]))
        colors = ['lightsalmon', 'lightsteelblue', 'darkseagreen']
        fig, ax = plt.subplots(1)
        ax.plot(fxx, pxx, c = 'grey')
        plt.xlim([0, 0.40])
        plt.xlabel(r'Frequency $(Hz)$')
        plt.ylabel(r'PSD $(s^2/Hz$)')
        
        for c, key in enumerate(freq_bands):
            ax.fill_between(fxx[min(np.where(fxx >= freq_bands[key][0])[0]): max(np.where(fxx <= freq_bands[key][1])[0])],
                            pxx[min(np.where(fxx >= freq_bands[key][0])[0]): max(np.where(fxx <= freq_bands[key][1])[0])],
                            0, facecolor = colors[c])
            
        patch1 = mpatches.Patch(color = colors[0], label = plot_labels[2])
        patch2 = mpatches.Patch(color = colors[1], label = plot_labels[1])
        patch3 = mpatches.Patch(color = colors[2], label = plot_labels[0])
        plt.legend(handles = [patch1, patch2, patch3])
        plt.show()

    return freqDomainFeats
    
def get_features_all(a):
    tmp = list(frequencyDomain(np.array(a[:,0])/1000,np.cumsum(a[:,0])/1000).values())
    return np.array(list(get_weighted_rr_features(a))+list(tmp))

def parse_for_features(data_day):
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[np.where((a[:,0]>300)&(a[:,0]<1500)&(a[:,2]<.2)&(a[:,1]>.05))[0],:2])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:remove_3sd(a))
    data_day['length1'] = data_day['rr_col'].apply(lambda a:a[0].shape[0])
    data_day = data_day[data_day.length1>20]
    nn = data_day.length1.max()
    if nn>60:
        nn = 120
    else:
        nn = 60
    print(data_day.shape,'rr')
    if data_day.shape[0]<15:
        return pd.DataFrame([],columns=data_day.columns)
    data_day['indicator'] = data_day['rr_col'].apply(lambda a:a[1])
    data_day['rr_col'] = data_day['rr_col'].apply(lambda a:a[0])
    data_day['likelihood'] = data_day['rr_col'].apply(lambda a:a[:,1])
    data_day['rr'] = data_day['rr_col'].apply(lambda a:a[:,0])
    data_day['rr_col1'] = data_day.apply(lambda a:np.vstack([list(a['rr']),list(a['likelihood'])]).T,axis=1)
    data_day['rr_features'] = data_day['rr'].apply(lambda a:get_rr_features(a))
    data_day['rr_weighted_features'] = data_day['rr_col1'].apply(lambda a:get_features_all(a))
    data_day['quality_features'] = data_day['likelihood'].apply(lambda a:get_quality_features(a,n=nn))
    data_day['quality_mag'] = data_day['quality_features'].apply(lambda a:np.sum(a)/len(a))
    return data_day

def normalize_daywise(feature_matrix,quals1):
    for i in range(feature_matrix.shape[1]):
        m,s = weighted_avg_and_std(feature_matrix[:,i], quals1)
        feature_matrix[:,i]  = (feature_matrix[:,i] - m)/s
    return feature_matrix

def smooth(y, box_pts=10):
    box = np.ones(box_pts)/box_pts
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth

def parse_day_data_ecg(data_day):
    data_day = data_day[['ecg_rr_array','ltime','window']].dropna()
    data_day['count_ecg'] = data_day['ecg_rr_array'].apply(lambda a:len(a))
    data_day = data_day[data_day.count_ecg>30]
    if data_day.shape[0]<30:
        return pd.DataFrame([],columns=['ecg_rr_array','ltime','window','count_ecg','ecg_rr_array_final','ecg_features'])
    data_day['ecg_rr_array_final'] = data_day['ecg_rr_array']
    data_day['ecg_features'] = data_day['ecg_rr_array_final'].apply(lambda a:np.array(list(get_rr_features(a))+list(frequencyDomain(np.array(a)/1000,
                                                                                                                           np.cumsum(a)/1000).values())))
    return data_day

from pandas.core.window import _flex_binary_moment, _Rolling_and_Expanding
import matplotlib.pyplot as plt

def weighted_mean(self, weights, **kwargs):
    weights = self._shallow_copy(weights)
    window = self._get_window(weights)

    def _get_weighted_mean(X, Y):
        X = X.astype('float64')
        Y = Y.astype('float64')
        sum_f = lambda x: x.rolling(window, self.min_periods, center=self.center).sum(**kwargs)
        return sum_f(X * Y) / sum_f(Y)

    return _flex_binary_moment(self._selected_obj, weights._selected_obj,
                               _get_weighted_mean, pairwise=True)

_Rolling_and_Expanding.weighted_mean = weighted_mean
def parse_each_day_ppg_ecg(a):
    try:
        columns = ['window', 'ltime', 'likelihood_max_array', 'activity', 'rr_array',
           'time', 'timestamp', 'likelihood_mean', 'localtime', 'ecg_rr_array',
           'day', 'version', 'user', 'quality_features', 'activity_features', 'likelihood', 'likelihood_ind', 'length', 'rr', 'rr_col',
           'length1', 'indicator', 'rr_col1', 'rr_features',
           'rr_weighted_features', 'quality_mag', 'ecg_rr_array_final', 'ecg_features']
        ecg_columns = ['window', 'ecg_rr_array_final','ecg_features']
    #     a = a.drop(['stress_likelihood', 'stress_likelihood_ecg'],axis=1)
        a_ecg = pd.DataFrame([],columns=ecg_columns)
        if a['ecg_rr_array'].dropna().shape[0]>60:
            a_ecg = parse_day_data_ecg(deepcopy(a))
            a_ecg = a_ecg[ecg_columns]
        a_ppg = parse_day_data(a)
        if a_ppg.shape[0]==0:
            return pd.DataFrame([],columns=columns)
        a_ppg = parse_for_features(a_ppg)
        if a_ppg.shape[0]==0:
            return pd.DataFrame([],columns=columns)
        a_ppg = pd.merge(a_ppg, a_ecg, how='left',left_on=['window'],right_on=['window'])
        if a_ppg.shape[0]<60:
            return pd.DataFrame([],columns=columns)
#         a_ppg = get_ecg_stress(a_ppg)
#         a_ppg = get_ppg_stress(a_ppg)
        return a_ppg
    except:
        return pd.DataFrame([],columns=columns)

def get_ppg_stress(a):
    clf = pickle.load(open('../models/stress_ppg_final.p','rb'))
    quals1 = np.array(list(a['quality_mag'].values))
    feature_matrix = np.array(list(a['rr_weighted_features']))
    if len(feature_matrix)<60:
        a['stress_likelihood_ppg'] = np.nan
        return a
    ss = np.repeat(feature_matrix[:,2],np.int64(np.round(100*quals1)))
    rr_70th = np.percentile(ss,30)
    rr_95th = np.percentile(ss,99.9)
    index = np.where((feature_matrix[:,2]>rr_70th)&(feature_matrix[:,2]<rr_95th))[0]
    for i in range(feature_matrix.shape[1]):
        m,s = weighted_avg_and_std(feature_matrix[index,i], quals1[index])
#         if i==2:
#             print(m,s)
        feature_matrix[:,i]  = (feature_matrix[:,i] - m)/s
    probs = clf.predict_proba(np.nan_to_num(feature_matrix))[:,1]
    a['stress_likelihood_ppg'] = probs
#     a1 = a[['time','stress_likelihood_ecg','quality_mag']].dropna().sort_values('time').reset_index(drop=True)
#     plt.figure(figsize=(16,8))
#     plt.plot(a1['time'],a1['stress_likelihood_ecg'],'*-k')
#     a1 = a[['time','stress_likelihood_ppg','quality_mag']].dropna().sort_values('time').reset_index(drop=True)
#     a1['stress_likelihood_ppg_qual'] = a1['stress_likelihood_ppg'].rolling(window = 11).weighted_mean(a['quality_mag'])
#     from sklearn.gaussian_process import GaussianProcessRegressor
#     from sklearn.gaussian_process.kernels import RBF
#     y = a1['stress_likelihood_ppg']
#     m = np.mean(y)
#     y = y 
#     X = a[['time','quality_mag']].values
#     X = StandardScaler().fit_transform(X)
#     X[:,0] = X[:,0] - np.mean(X[:,0])
#     print(X.shape)
#     gpr = GaussianProcessRegressor(kernel=RBF(length_scale=20),random_state=0).fit(X, y)
#     X_pred = X
#     X_pred[:,1] = np.mean(X[:,1])
#     y1 = gpr.predict(X_pred,return_std=False)
# #     plt.scatter(a1['time'],a1['stress_likelihood_ppg'],c=a['quality_mag'])
#     plt.plot(a1['time'],a1['stress_likelihood_ppg_qual'],'o-r')
#     plt.plot(a1['time'],y1,'o-k')
#     a = a.sort_values('time').reset_index(drop=True)
#     plt.bar(a1['time'],a1['quality_mag'],500,color='blue')
#     plt.plot(a['time'],a['stress_likelihood_ecg'],'s-k')
#     plt.show()
    return a

def get_ecg_stress(a):
    clf = pickle.load(open('../models/stress_ecg_final.p','rb'))
    a_ecg = deepcopy(a[['window','ecg_features']].dropna())
    feature_matrix = np.array(list(a_ecg['ecg_features']))
    if len(feature_matrix)<60:
        a['stress_likelihood_ecg'] = np.nan
        return a
    rr_70th = np.percentile(feature_matrix[:,2],70)
    rr_95th = np.percentile(feature_matrix[:,2],99)
    index = np.where((feature_matrix[:,2]>rr_70th)&(feature_matrix[:,2]<rr_95th))[0]
    means = np.mean(feature_matrix[index],axis=0)
    stds = np.std(feature_matrix[index],axis=0)
    print(means,stds,a['user'].values[0])
    feature_matrix = (feature_matrix - means)/stds
    probs = clf.predict_proba(feature_matrix)[:,1]
    a_ecg['stress_likelihood_ecg'] = probs
    a_ecg = a_ecg.drop(['ecg_features'],axis=1)
    print(a_ecg.dropna().shape)
    a = pd.merge(a, a_ecg, how='left', left_on=['window'], right_on=['window'])
    return a

def get_all_data(data,hand='left'):
    ema = data[['user','day','window','time','ltime','all_scores','score','label']]
    data = data.drop(['all_scores','score','label'],axis=1)
    data_all = get_daywise(data)
    if len(data_all)==0:
        return pd.DataFrame([],columns=['c']), pd.DataFrame([],columns=['c'])
    final_output = Parallel(n_jobs=25,verbose=4)(delayed(parse_each_day_ppg_ecg)(a) for a in data_all)
#     final_output = [parse_each_day_ppg_ecg(a) for a in data_all]
    final_output = [a for a in final_output if a.shape[0]>0]
    if len(final_output)==0:
        return pd.DataFrame([],columns=['c']), pd.DataFrame([],columns=['c'])
    final_output = pd.concat(final_output)
    final_output = get_ecg_stress(final_output)
    final_output = get_ppg_stress(final_output)
    final_output['stress_likelihood_ppg_qual'] = final_output['stress_likelihood_ppg']
    final_output['hand'] = hand
    return final_output,ema

def parse_each_participant(directory_left,directory_right,d):
    left_data,ema_left = get_all_data(pickle.load(open(directory_left+d,'rb')).reset_index(drop=True),'left')
    right_data,ema_right = get_all_data(pickle.load(open(directory_right+d,'rb')).reset_index(drop=True),'right')
    data = pd.concat([a for a in [left_data,right_data] if a.shape[0]>0])
    print(left_data.shape,right_data.shape,data.shape,left_data.columns)
    if data.shape[0]>0:
        pickle.dump([data,ema_left,ema_right],open(directory1+d,'wb'))
        print('saved','-'*30)
    return 0

directory_left = '../../cc3/rice_data/ecg_ppg_5_left_final/'
directory_right = '../../cc3/rice_data/ecg_ppg_5_right_final/'
directory1 = '../../cc3/rice_data/after_computation/ecg_ppg_final_total_5/'
import os
if not os.path.isdir(directory1):
    os.makedirs(directory1)
# all_data = Parallel(n_jobs=40,verbose=2)(delayed(parse_each_participant)(directory_left,directory_right,d) for d in np.intersect1d(os.listdir(directory_left),os.listdir(directory_right)) if d[-1]=='p')
all_data = [parse_each_participant(directory_left,directory_right,d) for d in np.intersect1d(os.listdir(directory_left),os.listdir(directory_right))[::-1] if d[-1]=='p']

[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   5 out of  14 | elapsed:   37.2s remaining:  1.1min
[Parallel(n_jobs=25)]: Done   9 out of  14 | elapsed:   42.0s remaining:   23.3s
[Parallel(n_jobs=25)]: Done  14 out of  14 | elapsed:   55.6s finished


[1.02007684e+04 7.57805135e+01 8.27580289e+02 8.17039141e+02
 8.69992088e+02 7.73150168e+02 7.35758230e+01 6.20705434e-02
 2.43335611e-01 5.31352817e-01 3.81919420e-01] [1.75243688e+04 4.34598600e+01 2.65510720e+01 3.48250429e+01
 4.45963774e+01 3.88542048e+01 3.29728399e+00 6.30927279e-02
 1.39978053e-01 1.12791213e-01 3.08609131e-01] fdddb3bd-bb88-458f-bcc8-e50bb3f87742
(4096, 2)


[Parallel(n_jobs=25)]: Using backend LokyBackend with 25 concurrent workers.
[Parallel(n_jobs=25)]: Done   3 out of  12 | elapsed:   23.1s remaining:  1.2min
[Parallel(n_jobs=25)]: Done   7 out of  12 | elapsed:   38.3s remaining:   27.4s


KeyboardInterrupt: 

Exception in thread QueueManagerThread:
Traceback (most recent call last):
  File "/usr/lib64/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/lib64/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/cerebralcortex/kessel_jupyter_virtualenv/cc3_high_performance/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 747, in _queue_management_worker
    recursive_terminate(p)
  File "/cerebralcortex/kessel_jupyter_virtualenv/cc3_high_performance/lib/python3.6/site-packages/joblib/externals/loky/backend/utils.py", line 28, in recursive_terminate
    _recursive_terminate_without_psutil(process)
  File "/cerebralcortex/kessel_jupyter_virtualenv/cc3_high_performance/lib/python3.6/site-packages/joblib/externals/loky/backend/utils.py", line 53, in _recursive_terminate_without_psutil
    _recursive_terminate(process.pid)
  File "/cerebralcortex/kessel_jupyter_virtualenv/cc3_high_performance/lib

In [9]:
import os
import pickle
import numpy as np
import pandas as pd
from pandas.core.window import _flex_binary_moment, _Rolling_and_Expanding
import matplotlib.pyplot as plt

def weighted_mean(self, weights, **kwargs):
    weights = self._shallow_copy(weights)
    window = self._get_window(weights)

    def _get_weighted_mean(X, Y):
        X = X.astype('float64')
        Y = Y.astype('float64')
        sum_f = lambda x: x.rolling(window, self.min_periods, center=self.center).sum(**kwargs)
        return sum_f(X * Y) / sum_f(Y)

    return _flex_binary_moment(self._selected_obj, weights._selected_obj,
                               _get_weighted_mean, pairwise=True)

def dump_data(directory_ema,directory1,directory2,f):
    if f not in os.listdir(directory_ema):
        return 0
    data = pickle.load(open(directory1+f,'rb'))
    a = data[0]
    this_participant = []
    stress_all = a[['time','ltime','user','stress_likelihood_ppg','quality_mag','day','activity','hand','rr_weighted_features']].dropna()
    stress_all['hr'] = stress_all['rr_weighted_features'].apply(lambda a:a[-5])
    stress_days = [a for i,a in stress_all.groupby(['hand','day'],as_index=False) if a.shape[0]>180]
    _Rolling_and_Expanding.weighted_mean = weighted_mean
    for a in stress_days:
        a = a.sort_values('time').reset_index(drop=True)
        a['stress_likelihood_ppg_qual'] = a['stress_likelihood_ppg'].rolling(window = 7).weighted_mean(a['quality_mag'])
        a['qual'] = a['quality_mag'].rolling(window = 7).weighted_mean(a['quality_mag'])
        a['hr'] = a['hr'].rolling(window = 7).weighted_mean(a['quality_mag'])
        this_participant.append(a)
    data = pd.concat(this_participant)
    ema = pickle.load(open(directory_ema+f,'rb'))
#     print(ema.columns)
    ema = ema.sort_values('score').reset_index(drop=True)
    ema['label'][ema['score']<=np.mean(ema['score'])] = 0
    ema['label'][ema['score']>np.mean(ema['score'])] = 1
#     ema['happy'] = ema['all_scores'].apply(lambda a:a[0])
#     ema['joyful'] = ema['all_scores'].apply(lambda a:a[1])
#     ema['nervous'] = ema['all_scores'].apply(lambda a:a[2])
#     ema['sad'] = ema['all_scores'].apply(lambda a:a[3])
#     ema['angry'] = ema['all_scores'].apply(lambda a:a[4])
#     labels = ema[['happy','joyful','nervous','sad','angry']].values
#     for j in range(labels.shape[1]):
#         t = labels[:,j]
#         t[t<=np.mean(t)] = 0
#         t[t!=0] = 1
#         labels[:,j] = t
#     labels = np.sum(labels,axis=1)
#     labels[labels<=2] = 0
#     labels[labels!=0] = 1
#     ema['label'] = labels
#     ind_max = np.argmax(diff)
#     if ind_max<3:
#         continue
#     ema['label'] = [0]*(ind_max-1) + [1]*(ema.shape[0]-ind_max+1)
#     plt.figure(figsize=(16,8))
# #     plt.hist(data['stress_likelihood_ecg'])
#     plt.hist(data['stress_likelihood_ppg_qual'])
#     plt.show()
#     plt.figure(figsize=(18,10))
#     plt.hist(data['stress_likelihood_ppg_qual'],50)
#     plt.show()
    pickle.dump([data,ema],open(directory2+f,'wb'))
    return 0

directory1 = '../../cc3/rice_data/after_computation/ecg_ppg_final_total_5/'
directory2 = '../../cc3/rice_data/after_ema_parsing/ecg_ppg_final_weighted_total_5/'
if not os.path.isdir(directory2):
    os.makedirs(directory2)
directory_ema = '../../cc3/rice_data/ecg_ppg_ema_final/'
from joblib import Parallel,delayed
output = Parallel(n_jobs=30,verbose=3)(delayed(dump_data)(directory_ema,directory1,directory2,f) for f in os.listdir(directory1) if f[-1]=='p')    
# output = [dump_data(directory_ema,directory1,directory2,f) for f in os.listdir(directory1) if f[-1]=='p']   

#         plt.figure(figsize=(18,10))
#         plt.plot(a['time'],a['stress_likelihood_ppg_qual'],'*-r')
#         plt.plot(a['time'],a['hr']/np.max(a['hr']),'o-c')
#         plt.title(np.max(a['hr']))
#         plt.bar(a['time'],a['qual'],50)
#         plt.show()

[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done  67 out of  94 | elapsed:    6.2s remaining:    2.5s
[Parallel(n_jobs=30)]: Done  94 out of  94 | elapsed:    9.3s finished


In [10]:
import os
import pickle
import numpy as np
import pandas as pd
from pandas.core.window import _flex_binary_moment, _Rolling_and_Expanding
import matplotlib.pyplot as plt
directory2 = '../../cc3/rice_data/after_ema_parsing/ecg_ppg_final_weighted_total_5/'
count = 0
df_col = []
ema_day = []
for f in os.listdir(directory2):
    if f[-1]!='p':
        continue
    data,ema = pickle.load(open(directory2+f,'rb'))
    print(ema['label'].unique())
    for day in ema['day'].unique():
        ema_day.append(ema[ema.day==day])
        if ema_day[-1].shape[0]<1:
            ema_day = ema_day[:-1]
            continue
        if data[data.day==day]['stress_likelihood_ppg_qual'].dropna().shape[0]<120:
            ema_day = ema_day[:-1]
            continue
        df_col.append(data[data.day==day])
#         count+=ema_day[-1].shape[0]


[0 1]
[0 1]
[0 1]
[0]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0]
[0 1]
[0 1]
[0]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0 1]
[0]
[0 1]
[0 1]


In [11]:
from joblib import Parallel,delayed
import numpy as np
def get_data_data(duration,df_col,ema_day):
    all_data = []
    all_emas = []
    all_users = []
    hands = []
    for i in range(len(df_col)):
        data = df_col[i].sort_values('time').reset_index(drop=True)
        ema = ema_day[i].sort_values('time').reset_index(drop=True)
        for hand in ['left','right']:
            for j,row in ema.iterrows():
                temp_data = data[(data.time>=row['time']-duration*60) & (data.time<row['time']) & (data.hand==hand)].sort_values('time').reset_index(drop=True)
                if temp_data.shape[0]>duration/3 and temp_data['time'].values[-1]-temp_data['time'].values[0]>duration*60/3:
                    all_data.append(temp_data)
                    all_emas.append(row)
                    all_users.append(row['user'])
                    hands.append(hand)
    return [duration,all_data,all_emas,all_users,hands]

data_data = Parallel(n_jobs=30,verbose=3)(delayed(get_data_data)(duration,df_col,ema_day) for duration in np.arange(5,225,10))
pickle.dump(data_data,open('../data/data_emas_all_duration_total.p','wb'))
    

[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   3 out of  22 | elapsed:   33.0s remaining:  3.5min
[Parallel(n_jobs=30)]: Done  11 out of  22 | elapsed:  1.6min remaining:  1.6min
[Parallel(n_jobs=30)]: Done  19 out of  22 | elapsed:  2.7min remaining:   25.7s
[Parallel(n_jobs=30)]: Done  22 out of  22 | elapsed:  3.2min finished


In [12]:
import numpy as np
import pylab as pb
import GPy 
%pylab inline
def get_predictions(X,Y,error):
    X = (X - np.mean(X))/np.std(X)
    mm = np.mean(Y)
    ss = np.std(Y)
    Y = (Y-np.mean(Y))/ss
    kern =  GPy.kern.RBF(input_dim=1) + GPy.kern.MLP(1) 
    Y_meta = {'output_index':np.arange(len(Y))[:,None]}
    m = GPy.models.GPHeteroscedasticRegression(X[:,None],Y[:,None],kern,Y_metadata=Y_meta)
    m['.*het_Gauss.variance'] = np.abs(error)[:,None] #Set the noise parameters to the error in Y
    m.het_Gauss.variance.fix() #We can fix the noise term, since we already know it
    m.optimize()
    preds,varss  = m.predict(m.X,full_cov=False,Y_metadata=None,kern=None,likelihood=None,include_likelihood=False)
    return preds*ss+mm,varss
len(df_col)

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy [pylab.py:160]


554

In [13]:
from copy import deepcopy
def save_pdf(data,ema,i):
    for hand in ['left','right']:
        g= deepcopy(data[data.hand==hand]).dropna()
#         g['hour'] = g['ltime'].apply(lambda a:int(a.hour))
#         print(g.hour.values)
#         g = g[(g.hour>=6) & (g.hour<20)]
        if g.shape[0]<100:
            continue
        g['ltime_str'] = g['ltime'].apply(lambda a:a.strftime("%H:%M"))
        preds,varss = get_predictions(g['time'].values,g['stress_likelihood_ppg_qual'].values,(1.1-g['quality_mag'].values)/2)
        plt.figure(figsize=(19,10))
        plt.title(g['user'].values[0] +'--'+ g['day'].values[0]+'--'+g['hand'].values[0]+' wrist')
        plt.plot((g['time']-np.min(g['time']))/60,g['stress_likelihood_ppg_qual'],'*-r',linewidth=1,label='Stress Likelihood')
        plt.plot((g['time']-np.min(g['time']))/60,preds,'*-k',linewidth=3,label='Stress Likelihood Post Processed by GP')
#         plt.plot((g['time']-np.min(g['time']))/60,g['hr']/60,'s-g',linewidth=3,label='Normalized Heart Rate w.r.t. 60 BPM')
        #         plt.plot((g['time']-np.min(g['time']))/60,g['stress_likelihood_ppg'],'y',linestyle='--')
        plt.bar((g['time']-np.min(g['time']))/60,g['quality_mag'],4,alpha=1,label='Minute Level Quality Metric')
        #         for j,row in ema.iterrows():
        #             print(row['label'])
        #             if np.int64(row['label'])==1:
        row = ema[ema.label==1]
        if row.shape[0]>0:
            plt.bar((row['time']-np.min(g['time']))/60,[1]*row.shape[0],10,alpha=1,label='Stress EMA',color='darkred')
        #             else:
        row = ema[ema.label==0]
        if row.shape[0]>0:
            plt.bar((row['time']-np.min(g['time']))/60,[1]*row.shape[0],10,alpha=1,label='Not Stress EMA',color='darkgreen')
        plt.legend(ncol=5)
        plt.xticks(np.array((g['time']-np.min(g['time']))/60)[np.arange(g.shape[0])%10==0],g['ltime_str'][np.arange(g.shape[0])%10==0],rotation=60)
        plt.ylim([0,1.3])
        plt.xlabel('Time of Day')
    #     plt.ylabel('Likelihood Values')
        plt.savefig('../pics_day/'+g['user'].values[0] +'--'+ g['day'].values[0]+'--'+g['hand'].values[0]+' wrist'+'.pdf',dps=1e6)
        plt.close('all')
    return 0
from joblib import Parallel,delayed
ouput = Parallel(n_jobs=-1,verbose=2)(delayed(save_pdf)(df_col[i].sort_values('time').reset_index(drop=True),
                                                        ema_day[i].sort_values('time').reset_index(drop=True),
                                                       i) for i in range(len(df_col)))
# ouput = [save_pdf(df_col[i].sort_values('time').reset_index(drop=True),
#                                                         ema_day[i].sort_values('time').reset_index(drop=True),
#                                                        i) for i in range(len(df_col))]
    #         plt.bar((g['time']-np.min(g['time']))/60,g['activity_f'],.2,alpha=.5,color='r')
#     plt.show()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 48 concurrent workers.
[Parallel(n_jobs=-1)]: Done  66 tasks      | elapsed:   41.2s
[Parallel(n_jobs=-1)]: Done 269 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 554 out of 554 | elapsed:  6.8min finished


In [14]:
from PyPDF2 import PdfFileMerger, PdfFileReader
import os
filenames = os.listdir('../pics_day/')
filenames = sorted([a for a in filenames if a[-1]=='f'])

In [15]:
merger = PdfFileMerger()
for filename in filenames:
    merger.append(PdfFileReader(open('../pics_day/'+filename, 'rb')))

merger.write("result-output_total_normalization.pdf")