In [1]:
import numpy as np
import pandas as pd
import scipy.stats

pd.options.mode.chained_assignment = None
from matplotlib import pyplot as plt, rcParams
# import cv2
import seaborn as sns

sns.set(style="white", context="paper")
from cycler import cycler
import os, sys
import glob
from datetime import datetime, timedelta
from itertools import combinations
import base64
from PIL import Image
from io import BytesIO as _BytesIO
import requests
import json
import pickle
from datetime import datetime
from IPython.display import display, Markdown, Latex
from sklearn.metrics import *
import collections
from copy import deepcopy
pd.options.display.max_columns = None
def printm(s): return display(Markdown(s))


In [2]:
def get_weekly_stress_score(week_context_attributes):
    """
    Return stress score for the week
    :param week_context_attributes: How days context looks like
    :return: weekly stress score based on FSM
    """
    stress_score = 0.
    total_days_data_in_week = len(week_context_attributes.keys())

    # Condition 1: Context of exercising and housework existing in 57% of week: -1 stress score
    c1_exercise_count, c1_housework_count = 0.,0.
    for day in week_context_attributes.keys():
        if 'Exercising' in week_context_attributes[day].context.values:
            c1_exercise_count +=1
        if 'HouseWork' in week_context_attributes[day].context.values:
            c1_housework_count+=1

    if ((c1_exercise_count/total_days_data_in_week) >= 0.57) and ((c1_housework_count/total_days_data_in_week) >= 0.57):
        stress_score -= 1

    # Condition 2a: No context of exercising for more than 57% week(4 days if full week available): +0.5 stress score
    # Condition 2b: No context of housework  for more than 4 days(57% week): +0.5 stress score
    c2a_exercise_count = c1_exercise_count
    c2b_housework_count = c1_housework_count

    if (c2a_exercise_count/total_days_data_in_week) < 0.57:
        stress_score += 0.5
    if (c2b_housework_count/total_days_data_in_week) < 0.57:
        stress_score += 0.5

    # Condition 3: context of commute for more than an hour(on average) in a day everyday: +0.5 stress score
    c3_commute_hours = []
    for day in week_context_attributes:
        if 'Commuting' not in week_context_attributes[day].context.values:
            c3_commute_hours.append(0.)
        else:
            day_commute_values = week_context_attributes[day][week_context_attributes[day].context=='Commuting']
            day_commute_values['period'] = day_commute_values['end'] - day_commute_values['start']
            c3_commute_hours.append(day_commute_values['period'].sum())

    if np.mean(c3_commute_hours) > (60*60): # more than 1 hours of average commute
        stress_score +=0.5


    # condition 4a: context of sleep for less than 6 hours on more than 3 days(42% week): +1 stress score
    # condition 4b: context of sleep for less more than 8 hours on an average: -0.5 stress score
    c4_sleep_hours = []
    for day in week_context_attributes:
        if 'Sleeping' not in week_context_attributes[day].context.values:
            c4_sleep_hours.append(0.)
        else:
            day_sleep_values = week_context_attributes[day][week_context_attributes[day].context == 'Sleeping']
            day_sleep_values['period'] = day_sleep_values['end'] - day_sleep_values['start']
            c4_sleep_hours.append(day_sleep_values['period'].sum())

    if np.max(sorted(c4_sleep_hours)[:3]) < (6 * 60 * 60):  # 4a.less than 6 hours of sleep for 3 days
        stress_score += 1

    if np.mean(c4_sleep_hours) > (8 * 60 * 60):  # 4b. more than 8 hours of sleep
        stress_score -= 0.5

    # condition 5: (More than 4 days(57%) of Exercising context for more than half an hour) &
    # (Different working hours (>40% difference) across days): +0.5 stress score
    c5_exercising_count = c1_exercise_count
    c5_exercising_hours = []
    for day in week_context_attributes:
        if 'Exercising' in week_context_attributes[day].context.values:
            day_exercise_values = week_context_attributes[day][week_context_attributes[day].context == 'Exercising']
            day_exercise_values['period'] = day_exercise_values['end'] - day_exercise_values['start']
            c5_exercising_hours.append(day_exercise_values['period'].sum())
    c5_exercise_cond = False
    if ((c5_exercising_count/total_days_data_in_week) >= 0.57) and (np.min(c5_exercising_hours) > (0.5*60*60)):
        c5_exercise_cond = True

    c5_working_periods = {}
    for day in week_context_attributes:
        if 'OfficeWork' in week_context_attributes[day].context.values:
            day_work_values = week_context_attributes[day][week_context_attributes[day].context == 'OfficeWork']
            day_work_values['start_hour'] = pd.to_datetime(day_work_values['start'],unit='s').dt.hour
            day_work_values['end_hour'] = pd.to_datetime(day_work_values['end'], unit='s').dt.hour
            c5_working_periods[day] = np.zeros(24)
            for idx, row in day_work_values.iterrows():
                c5_working_periods[day][row['start_hour']:row['end_hour']+1] = 1

    c5_hour_differences= [0.]
    for dayA in c5_working_periods.keys():
        for dayB in c5_working_periods.keys():
            if not (dayA==dayB):
                working_hoursA = c5_working_periods[dayA]
                working_hoursB = c5_working_periods[dayB]
                diff_hours = np.sum(working_hoursA!=working_hoursB)
                same_hours = np.sum(np.logical_and(working_hoursA==working_hoursB,working_hoursA))
                if same_hours > 0.:
                    c5_hour_differences.append(diff_hours / same_hours)
                else:
                    c5_hour_differences.append(0.)

    c5_working_cond = False
    if np.max(c5_hour_differences) > 0.4:
        c5_working_cond = True

    if (c5_working_cond) & (c5_exercise_cond):
        stress_score +=0.5

    # condition 6: (No context of exercising) & (Different working hours across days): +1 stress score
    c6_exercise_count=  c1_exercise_count
    c6_working_hour_diff = c5_hour_differences
    if (c6_exercise_count==0.) & (np.mean(c5_hour_differences) > 0.4):
        stress_score += 1

    # condition 7: (More than 4 days of Exercising context for more than half an hour) &
    # (No Different working hours (>40% difference)  across days): -1 stress score
    c7_exercise_cond = c5_exercise_cond
    c7_hour_differences = c5_hour_differences

    if (c7_exercise_cond) & (np.max(c7_hour_differences) < 0.4):
        stress_score -= 1.

    # condition 8: (More than 10 hrs of works in any day) OR (50+ hours per week): +1 stress score
    c8_work_hours = []
    for day in week_context_attributes:
        if 'OfficeWork' in week_context_attributes[day].context.values:
            day_work_values = week_context_attributes[day][week_context_attributes[day].context == 'OfficeWork']
            day_work_values['period'] = day_work_values['end'] - day_work_values['start']
            c8_work_hours.append(day_work_values['period'].sum())
        else:
            c8_work_hours.append(0.)

    if (np.max(c8_work_hours) > (10 * 60 * 60)) or (np.sum(c8_work_hours) > (50 * 60 * 60)):
        stress_score += 1

    # condition 9a: (More than 40% shift in working hours in max gap): +1 stress score
    # condition 9b: (More than 40% shift in working hours in 2 or more consecutive days): +1 stress score
    # Not modeling due to over complexity

    # condition 10a: More than 15 mins exercising every day: -0.5 stress score
    # condition 10b: More than 1 hour housework every day: -0.5 stress score
    c10a_exercising_hours = []
    for day in week_context_attributes:
        if 'Exercising' in week_context_attributes[day].context.values:
            day_exercise_values = week_context_attributes[day][week_context_attributes[day].context == 'Exercising']
            day_exercise_values['period'] = day_exercise_values['end'] - day_exercise_values['start']
            c10a_exercising_hours.append(day_exercise_values['period'].sum())
        else:
            c10a_exercising_hours.append(0.)

    c10b_housework_hours = []
    for day in week_context_attributes:
        if 'HouseWork' not in week_context_attributes[day].context.values:
            c10b_housework_hours.append(0.)
        else:
            day_housework = week_context_attributes[day][week_context_attributes[day].context == 'HouseWork']
            day_housework['period'] = day_housework['end'] - day_housework['start']
            c10b_housework_hours.append(day_housework['period'].sum())

    if np.min(c10a_exercising_hours) > 15 * 60:
        stress_score -=0.5

    if np.min(c10b_housework_hours) > 60 * 60:
        stress_score -= 0.5



    # condition 11a: Continuous 30 mins exercise, five times a week: -1 stress score
    # condition 11b: 15 mins exercise twice, five times a week: -1 stress score
    # condition 11c: 10 mins exercise thrice, five times a week: -1 stress score

    c11_day_cond = []
    for day in week_context_attributes.keys():
        if 'Exercising' not in week_context_attributes[day].context.values:
            c11_day_cond.append(False)
        else:
            day_exercise_values = week_context_attributes[day][week_context_attributes[day].context == 'OfficeWork']
            day_exercise_values['period'] = day_exercise_values['end'] - day_exercise_values['start']
            day_num_exercises = day_exercise_values.shape[0]
            day_min_continuous_exercise = day_exercise_values['period'].min()
            if (day_num_exercises >=1) and (day_min_continuous_exercise > 30*60):
                c11_day_cond.append(True)
            elif (day_num_exercises >=2) and (day_min_continuous_exercise > 15*60):
                c11_day_cond.append(True)
            elif (day_num_exercises >=3) and (day_min_continuous_exercise > 10*60):
                c11_day_cond.append(True)
            else:
                c11_day_cond.append(False)

    if (np.sum(c11_day_cond)/len(c11_day_cond)) > 0.7: # 5+ days in a week
        stress_score -= 1

    # condition 12: Working hours more than 8 for more than 5 days: +1 stress score
    c12_work_hours = c8_work_hours

    if np.percentile(c12_work_hours,0.7) > (8 * 60 * 60):
        stress_score += 1

    # condition 13a: Inactivity/Amusement for more than 2 hours everyday: -0.5 stress score
    # condition 13b: Inactivity/Amusement/Housework for more than 5 hours atleast two days: -0.5 stress score
    c13a_inactivity_amusement_hours = []
    for day in week_context_attributes:
        day_13a_hours = week_context_attributes[day][week_context_attributes[day].context.isin(['Inactivity','Amusement'])]
        if day_13a_hours.shape[0] > 0.:
            c13a_inactivity_amusement_hours.append((day_13a_hours['end'] - day_13a_hours['start']).sum())
        else:
            c13a_inactivity_amusement_hours.append(0.)

    c13b_inactivity_amusement_housework_hours = []
    for day in week_context_attributes:
        day_13b_hours = week_context_attributes[day][week_context_attributes[day].context.isin(['Inactivity','Amusement','HouseWork'])]
        if day_13b_hours.shape[0] > 0.:
            c13b_inactivity_amusement_housework_hours.append((day_13b_hours['end'] - day_13b_hours['start']).sum())
        else:
            c13b_inactivity_amusement_housework_hours.append(0.)

    if np.min(c13a_inactivity_amusement_hours) > (2 * 60 * 60):
        stress_score -= 0.5
    if np.percentile(c13b_inactivity_amusement_housework_hours, 30) > (5 * 60 * 60):
        stress_score -= 0.5

    # condition 14a: Working hours between 12am and 8am for any day: +1 stress score
    # condition 14b: Working hours between 12am and 8am for more than 4 days: +1 stress score

    c14_working_periods = c5_working_periods
    c14_night_work = []
    for day in c14_working_periods.keys():
        if np.sum(c14_working_periods[day][:8]) > 1:
            c14_night_work.append(True)
        else:
            c14_night_work.append(False)

    if np.sum(c14_night_work) >=1:
        stress_score +=1
    if np.sum(c14_night_work) >=4:
        stress_score += 1

    return stress_score


In [3]:
df_onto_casas = pd.read_csv("../../cache/trace_results/extrasensory_onto_trace.csv")
df_onto_casas['tao_prediction'] = df_onto_casas['tao_prediction'].apply(lambda x: x.split(";") if not (str(x)=='nan') else ['Unknown'])
df_onto_casas.head()

Unnamed: 0,timestamp,activities,id,tao_prediction
0,1444079161,"Sitting,In a meeting,With co-workers",00EABED2-271D-49D8-B599-1D4A09240601,[Unknown]
1,1444079221,"Sitting,In a meeting,With co-workers",00EABED2-271D-49D8-B599-1D4A09240601,[Unknown]
2,1444079281,"Sitting,In a meeting,With co-workers",00EABED2-271D-49D8-B599-1D4A09240601,[Unknown]
3,1444079341,"Sitting,In a meeting,With co-workers",00EABED2-271D-49D8-B599-1D4A09240601,[Unknown]
4,1444079431,"Sitting,In a meeting,With co-workers",00EABED2-271D-49D8-B599-1D4A09240601,[Unknown]


In [4]:
ts_dict = pickle.load(open("../../cache/trace_results/ts_results_extra.pb","rb"))
ts_results, cluster_labels = ts_dict['ts_results'], ts_dict['clusters']

In [30]:
list(ts_results.keys())

['00EABED2-271D-49D8-B599-1D4A09240601',
 '098A72A5-E3E5-4F54-A152-BBDA0DF7B694',
 '0A986513-7828-4D53-AA1F-E02D6DF9561B',
 '0BFC35E2-4817-4865-BFA7-764742302A2D',
 '0E6184E1-90C0-48EE-B25A-F1ECB7B9714E',
 '1155FF54-63D3-4AB2-9863-8385D0BD0A13',
 '11B5EC4D-4133-4289-B475-4E737182A406',
 '136562B6-95B2-483D-88DC-065F28409FD2',
 '1538C99F-BA1E-4EFB-A949-6C7C47701B20',
 '1DBB0F6F-1F81-4A50-9DF4-CD62ACFA4842',
 '24E40C4C-A349-4F9F-93AB-01D00FB994AF',
 '27E04243-B138-4F40-A164-F40B60165CF3',
 '2C32C23E-E30C-498A-8DD2-0EFB9150A02E',
 '33A85C34-CFE4-4732-9E73-0A7AC861B27A',
 '3600D531-0C55-44A7-AE95-A7A38519464E',
 '40E170A7-607B-4578-AF04-F021C3B0384A',
 '481F4DD2-7689-43B9-A2AA-C8772227162B',
 '4E98F91F-4654-42EF-B908-A3389443F2E7',
 '4FC32141-E888-4BFF-8804-12559A491D8C',
 '5119D0F8-FCA8-4184-A4EB-19421A40DE0D',
 '5152A2DF-FAF3-4BA8-9CA9-E66B32671A53',
 '59818CD2-24D7-4D32-B133-24C2FE3801E5',
 '59EEFAE0-DEB0-4FFF-9250-54D2A03D0CF2',
 '5EF64122-B513-46AE-BCF1-E62AAC285D2C',
 '61359772-D8D8-

In [79]:
wellness_user_inputs = {}

In [7]:

id = '00EABED2-271D-49D8-B599-1D4A09240601'

In [80]:
for id in ts_results.keys():
    print(f"Start id {id}")
    df_ts_input =ts_results[id]
    df_ts_input.info()
    pred_min_ts, pred_max_ts = df_ts_input.start_timestamp.min(), df_ts_input.end_timestamp.max()
    df_pred_ts = pd.DataFrame(np.arange(pred_min_ts, pred_max_ts + 1), columns=['timestamp'])
    df_pred_ts['pred_context'] = None
    df_pred_ts = df_pred_ts.set_index('timestamp')
    for row_idx, row in df_ts_input.iterrows():
        df_pred_ts.loc[row['start_timestamp']:row['end_timestamp'], 'pred_context'] = df_pred_ts.loc[
                                                                                      row['start_timestamp']:
                                                                                      row['end_timestamp'],
                                                                                      'pred_context'].apply(
            lambda x: (x + cluster_labels[int(row['cluster_id'])]) if (x is not None) else cluster_labels[
                int(row['cluster_id'])])
    df_pred_ts = df_pred_ts.reset_index().rename(columns={'pred_context':'context'})
    df_pred_ts = df_pred_ts[~df_pred_ts.context.isnull()]
    df_pred_ts.head()
    df_onto_ts = df_onto_casas[df_onto_casas.id==id][['timestamp','tao_prediction']].rename(columns={'tao_prediction':'context'})
    df_onto_ts = df_onto_ts[~df_onto_ts.context.isnull()]
    df_onto_ts.head()
    df_combined_ts = pd.merge(df_onto_ts, df_pred_ts, on=['timestamp'], suffixes=('_onto','_tp'))
    df_combined_ts.head()
    df_combined_ts['context'] = df_combined_ts.apply(lambda row: ",".join(row['context_tp']+row['context_onto']),axis=1)
    df_combined_ts = df_combined_ts[['timestamp','context']]
    all_contexts = ['Exercising','HouseWork','Commuting','Sleeping','OfficeWork', 'Relaxing','Amusement','HavingMeal','UsingBathroom']
    for context in all_contexts:
        df_combined_ts[context] = df_combined_ts['context'].apply(lambda x: context in x)
    df_combined_ts = df_combined_ts.drop('context',axis=1)
    df_combined_ts = pd.melt(df_combined_ts, id_vars='timestamp',var_name='context',value_name='isPresent')
    df_combined_ts = df_combined_ts[df_combined_ts.isPresent==True][['timestamp','context']]
    wellness_user_inputs[id] =df_combined_ts
    print(f"Done id {id}")

Start id 00EABED2-271D-49D8-B599-1D4A09240601
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2081 entries, 0 to 2080
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   start_timestamp       2081 non-null   float64
 1   next_start_timestamp  2081 non-null   float64
 2   end_timestamp         2081 non-null   float64
 3   cluster_id            2081 non-null   int64  
dtypes: float64(3), int64(1)
memory usage: 65.2 KB
Done id 00EABED2-271D-49D8-B599-1D4A09240601
Start id 098A72A5-E3E5-4F54-A152-BBDA0DF7B694
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5423 entries, 0 to 5422
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   start_timestamp       5423 non-null   float64
 1   next_start_timestamp  5423 non-null   float64
 2   end_timestamp         5423 non-null   float64
 3   cluster_id            5423 non-nul

KeyboardInterrupt: 

In [81]:

len(wellness_user_inputs.keys())

34

Unnamed: 0,timestamp,context
0,1444079161,[Unknown]
1,1444079221,[Unknown]
2,1444079281,[Unknown]
3,1444079341,[Unknown]
4,1444079431,[Unknown]


Done id 00EABED2-271D-49D8-B599-1D4A09240601


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40442 entries, 0 to 40441
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   start_timestamp       40442 non-null  float64
 1   next_start_timestamp  40442 non-null  float64
 2   end_timestamp         40442 non-null  float64
 3   cluster_id            40442 non-null  int64  
dtypes: float64(3), int64(1)
memory usage: 1.2 MB


Unnamed: 0_level_0,pred_context,context
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
1342781000.0,,[Sleeping]
1342781000.0,,[Sleeping]
1342781000.0,,[Sleeping]
1342781000.0,,[Sleeping]
1342781000.0,,[Sleeping]


In [32]:

df_combined_ts.head()

Unnamed: 0,timestamp,context
196866,1308130861,HouseWork
196867,1308130864,HouseWork
196868,1308130883,HouseWork
196869,1308130890,HouseWork
196870,1308130914,HouseWork


In [33]:
df_wellness_input = df_combined_ts

In [72]:
user_wellness_scores = {}
for id in wellness_user_inputs.keys():
    df_wellness_input = wellness_user_inputs[id].copy(deep=True)
    df_wellness_input['datetime'] = pd.to_datetime(df_wellness_input['timestamp'], unit='s')
    df_wellness_input['week'] = df_wellness_input['datetime'].apply(lambda x: x.strftime("%Y_%V"))
    df_wellness_input['day'] = df_wellness_input['datetime'].apply(lambda x: x.strftime("%Y-%m-%d"))

    user_week_context_attributes = []
    for week in df_wellness_input['week'].unique():
        df_week_wellness_input = df_wellness_input[df_wellness_input.week == week]
        week_context_attributes = {}
        for day in df_week_wellness_input['day'].unique():
            df_day_wellness_input = df_week_wellness_input[df_week_wellness_input.day == day]
            df_day_wellness_input['context_grp'] = (
                    df_day_wellness_input['context'] != df_day_wellness_input['context'].shift(1)).cumsum()
            day_contexts = df_day_wellness_input.groupby(['context_grp', 'context'], as_index=False).agg({
                'timestamp': ['min', 'max', lambda x: x.max() - x.min()]
            })
            day_contexts.columns = ['group', 'context', 'start', 'end', 'length']
            week_context_attributes[day] = day_contexts
        user_week_context_attributes.append([week, week_context_attributes])
        print(f'Done week {week}.')
    weekly_stress_scores = []
    for week, week_context_attributes in user_week_context_attributes:
        week_stress_score = get_weekly_stress_score(week_context_attributes)
        weekly_stress_scores.append((week, week_stress_score))
    user_wellness_scores[id] = weekly_stress_scores
    print(f"Done user {id}")
    # break

Done week 2015_41.
Done week 2015_42.
Done user 00EABED2-271D-49D8-B599-1D4A09240601
Done week 2015_32.
Done week 2015_34.
Done week 2015_35.
Done week 2015_36.
Done user 098A72A5-E3E5-4F54-A152-BBDA0DF7B694
Done week 2015_50.
Done user 0A986513-7828-4D53-AA1F-E02D6DF9561B
Done week 2015_43.
Done week 2015_44.
Done user 0BFC35E2-4817-4865-BFA7-764742302A2D
Done week 2015_49.
Done week 2015_50.
Done user 0E6184E1-90C0-48EE-B25A-F1ECB7B9714E
Done week 2016_21.
Done week 2016_22.
Done user 1155FF54-63D3-4AB2-9863-8385D0BD0A13
Done week 2015_34.
Done week 2015_35.
Done user 11B5EC4D-4133-4289-B475-4E737182A406
Done week 2015_34.
Done week 2015_35.
Done user 136562B6-95B2-483D-88DC-065F28409FD2
Done week 2015_43.
Done week 2015_44.
Done user 1538C99F-BA1E-4EFB-A949-6C7C47701B20
Done week 2015_35.
Done week 2015_36.
Done user 1DBB0F6F-1F81-4A50-9DF4-CD62ACFA4842
Done week 2015_43.
Done week 2015_44.
Done week 2015_45.
Done user 24E40C4C-A349-4F9F-93AB-01D00FB994AF
Done week 2015_48.
Done wee

In [73]:
user_wellness_scores

{'00EABED2-271D-49D8-B599-1D4A09240601': [('2015_41', 2.0), ('2015_42', 2.5)],
 '098A72A5-E3E5-4F54-A152-BBDA0DF7B694': [('2015_32', 3.0),
  ('2015_34', 0.0),
  ('2015_35', 1.5),
  ('2015_36', 2.5)],
 '0A986513-7828-4D53-AA1F-E02D6DF9561B': [('2015_50', 0.5)],
 '0BFC35E2-4817-4865-BFA7-764742302A2D': [('2015_43', 1.5), ('2015_44', 1.0)],
 '0E6184E1-90C0-48EE-B25A-F1ECB7B9714E': [('2015_49', 4.0), ('2015_50', 2.0)],
 '1155FF54-63D3-4AB2-9863-8385D0BD0A13': [('2016_21', 5.0), ('2016_22', 2.0)],
 '11B5EC4D-4133-4289-B475-4E737182A406': [('2015_34', 1.5), ('2015_35', -0.5)],
 '136562B6-95B2-483D-88DC-065F28409FD2': [('2015_34', 4.5), ('2015_35', 0.0)],
 '1538C99F-BA1E-4EFB-A949-6C7C47701B20': [('2015_43', 1.0), ('2015_44', 2.5)],
 '1DBB0F6F-1F81-4A50-9DF4-CD62ACFA4842': [('2015_35', 2.5), ('2015_36', 3.5)],
 '24E40C4C-A349-4F9F-93AB-01D00FB994AF': [('2015_43', 3.5),
  ('2015_44', 2.0),
  ('2015_45', 2.0)],
 '27E04243-B138-4F40-A164-F40B60165CF3': [('2015_48', 5.0), ('2015_49', 2.0)],
 '2C3

In [38]:
wellness_user_inputs.keys()

dict_keys(['00EABED2-271D-49D8-B599-1D4A09240601', '098A72A5-E3E5-4F54-A152-BBDA0DF7B694', '0A986513-7828-4D53-AA1F-E02D6DF9561B', '0BFC35E2-4817-4865-BFA7-764742302A2D', '0E6184E1-90C0-48EE-B25A-F1ECB7B9714E', '1155FF54-63D3-4AB2-9863-8385D0BD0A13', '11B5EC4D-4133-4289-B475-4E737182A406', '136562B6-95B2-483D-88DC-065F28409FD2', '1538C99F-BA1E-4EFB-A949-6C7C47701B20', '1DBB0F6F-1F81-4A50-9DF4-CD62ACFA4842', '24E40C4C-A349-4F9F-93AB-01D00FB994AF', '27E04243-B138-4F40-A164-F40B60165CF3', '2C32C23E-E30C-498A-8DD2-0EFB9150A02E', '33A85C34-CFE4-4732-9E73-0A7AC861B27A', '3600D531-0C55-44A7-AE95-A7A38519464E', '40E170A7-607B-4578-AF04-F021C3B0384A', '481F4DD2-7689-43B9-A2AA-C8772227162B', '4E98F91F-4654-42EF-B908-A3389443F2E7', '4FC32141-E888-4BFF-8804-12559A491D8C', '5119D0F8-FCA8-4184-A4EB-19421A40DE0D', '5152A2DF-FAF3-4BA8-9CA9-E66B32671A53', '59818CD2-24D7-4D32-B133-24C2FE3801E5', '59EEFAE0-DEB0-4FFF-9250-54D2A03D0CF2', '5EF64122-B513-46AE-BCF1-E62AAC285D2C', '61359772-D8D8-480D-B623-7C63

In [36]:
weekly_stress_scores = []
for week, week_context_attributes in user_week_context_attributes:
    week_stress_score = get_weekly_stress_score(user_week_context_attributes[0][1])
    weekly_stress_scores.append((week, week_stress_score))
weekly_stress_scores

[('2011_24', 1.0),
 ('2011_25', 1.0),
 ('2011_26', 1.0),
 ('2011_27', 1.0),
 ('2011_28', 1.0),
 ('2011_29', 1.0),
 ('2011_30', 1.0),
 ('2011_31', 1.0),
 ('2011_32', 1.0),
 ('2011_33', 1.0)]

In [12]:
user_week_context_attributes[0]

['2012_29',
 {'2012-07-20':     group        context       start         end  length
  0       1       GoingOut  1342780734  1342781454     720
  1       3  UsingBathroom  1342782560  1342782599      39
  2       5  UsingBathroom  1342782609  1342782609       0
  3       6      PhoneCall  1342785004  1342785340     336
  4       9  UsingBathroom  1342785645  1342785930     285
  5      12       GoingOut  1342786003  1342786018      15
  6      13       ComingIn  1342786433  1342786450      17
  7      14       Sleeping  1342786485  1342787744    1259
  8      16       Sleeping  1342787819  1342789329    1510
  9      17        Unknown  1342789491  1342789491       0
  10     18       GoingOut  1342789494  1342789509      15
  11     19       ComingIn  1342795232  1342795247      15
  12     22  UsingBathroom  1342795272  1342795407     135
  13     25       Sleeping  1342795479  1342796390     911
  14     26        Unknown  1342796446  1342796450       4
  15     27     HavingMeal  13

In [40]:
user_wellness_scores

{'00EABED2-271D-49D8-B599-1D4A09240601': [('2015_41', 2.0), ('2015_42', 2.0)],
 '098A72A5-E3E5-4F54-A152-BBDA0DF7B694': [('2015_32', 3.0),
  ('2015_34', 3.0),
  ('2015_35', 3.0),
  ('2015_36', 3.0)],
 '0A986513-7828-4D53-AA1F-E02D6DF9561B': [('2015_50', 0.5)],
 '0BFC35E2-4817-4865-BFA7-764742302A2D': [('2015_43', 1.5), ('2015_44', 1.5)],
 '0E6184E1-90C0-48EE-B25A-F1ECB7B9714E': [('2015_49', 4.0), ('2015_50', 4.0)],
 '1155FF54-63D3-4AB2-9863-8385D0BD0A13': [('2016_21', 5.0), ('2016_22', 5.0)],
 '11B5EC4D-4133-4289-B475-4E737182A406': [('2015_34', 1.5), ('2015_35', 1.5)],
 '136562B6-95B2-483D-88DC-065F28409FD2': [('2015_34', 4.5), ('2015_35', 4.5)],
 '1538C99F-BA1E-4EFB-A949-6C7C47701B20': [('2015_43', 1.0), ('2015_44', 1.0)],
 '1DBB0F6F-1F81-4A50-9DF4-CD62ACFA4842': [('2015_35', 2.5), ('2015_36', 2.5)],
 '24E40C4C-A349-4F9F-93AB-01D00FB994AF': [('2015_43', 3.5),
  ('2015_44', 3.5),
  ('2015_45', 3.5)],
 '27E04243-B138-4F40-A164-F40B60165CF3': [('2015_48', 5.0), ('2015_49', 5.0)],
 '2C32

In [82]:
productivity_user_inputs = None
for id in wellness_user_inputs.keys():
    prod_id_input = wellness_user_inputs[id].copy(deep=True)
    prod_id_input['datetime'] = pd.to_datetime(prod_id_input['timestamp'], unit='s')
    prod_id_input['week'] = prod_id_input['datetime'].apply(lambda x: x.strftime("%Y_%V"))
    prod_id_input['day'] = prod_id_input['datetime'].apply(lambda x: x.strftime("%Y-%m-%d"))
    prod_id_input['hourOfDay'] = prod_id_input['datetime'].apply(lambda x: x.strftime("%H"))
    prod_id_input = prod_id_input[['day','hourOfDay','context']]
    prod_id_input = prod_id_input.groupby(['day','hourOfDay'],as_index=False).agg({'context':[lambda x: (x=='OfficeWork').sum()/len(x), lambda x: ((x=='Amusement') | (x=='HavingMeal') | (x=='UsingBathroom')).sum()/len(x)]})
    prod_id_input.columns = ['day','hourOfDay','OfficeWork','Relaxing']
    prod_id_input['id'] = id
    prod_id_input = prod_id_input[['id','day','hourOfDay','OfficeWork','Relaxing']]
    if productivity_user_inputs is None:
        productivity_user_inputs = prod_id_input.copy(deep=True)
    else:
        productivity_user_inputs = pd.concat([productivity_user_inputs, prod_id_input], ignore_index=True)
    print(f"Done id {id}")

Done id 00EABED2-271D-49D8-B599-1D4A09240601
Done id 098A72A5-E3E5-4F54-A152-BBDA0DF7B694
Done id 0A986513-7828-4D53-AA1F-E02D6DF9561B
Done id 0BFC35E2-4817-4865-BFA7-764742302A2D
Done id 0E6184E1-90C0-48EE-B25A-F1ECB7B9714E
Done id 1155FF54-63D3-4AB2-9863-8385D0BD0A13
Done id 11B5EC4D-4133-4289-B475-4E737182A406
Done id 136562B6-95B2-483D-88DC-065F28409FD2
Done id 1538C99F-BA1E-4EFB-A949-6C7C47701B20
Done id 1DBB0F6F-1F81-4A50-9DF4-CD62ACFA4842
Done id 24E40C4C-A349-4F9F-93AB-01D00FB994AF
Done id 27E04243-B138-4F40-A164-F40B60165CF3
Done id 2C32C23E-E30C-498A-8DD2-0EFB9150A02E
Done id 33A85C34-CFE4-4732-9E73-0A7AC861B27A
Done id 3600D531-0C55-44A7-AE95-A7A38519464E
Done id 40E170A7-607B-4578-AF04-F021C3B0384A
Done id 481F4DD2-7689-43B9-A2AA-C8772227162B
Done id 4E98F91F-4654-42EF-B908-A3389443F2E7
Done id 4FC32141-E888-4BFF-8804-12559A491D8C
Done id 5119D0F8-FCA8-4184-A4EB-19421A40DE0D
Done id 5152A2DF-FAF3-4BA8-9CA9-E66B32671A53
Done id 59818CD2-24D7-4D32-B133-24C2FE3801E5
Done id 59

In [83]:
productivity_user_inputs.info()
productivity_user_inputs.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3757 entries, 0 to 3756
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          3757 non-null   object 
 1   day         3757 non-null   object 
 2   hourOfDay   3757 non-null   object 
 3   OfficeWork  3757 non-null   float64
 4   Relaxing    3757 non-null   float64
dtypes: float64(2), object(3)
memory usage: 146.9+ KB


Unnamed: 0,id,day,hourOfDay,OfficeWork,Relaxing
0,00EABED2-271D-49D8-B599-1D4A09240601,2015-10-05,21,0.483871,0.080645
1,00EABED2-271D-49D8-B599-1D4A09240601,2015-10-05,22,0.952381,0.0
2,00EABED2-271D-49D8-B599-1D4A09240601,2015-10-05,23,0.0,0.227723
3,00EABED2-271D-49D8-B599-1D4A09240601,2015-10-06,0,0.0,0.444444
4,00EABED2-271D-49D8-B599-1D4A09240601,2015-10-06,1,0.0,0.484305


In [84]:
productivity_user_inputs.to_csv("../../cache/productivity_input.csv",index=False)

In [78]:
productivity_user_inputs.Relaxing.sum()

0.0

Unnamed: 0,id,day,hourOfDay,OfficeWork,Relaxing
0,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-09-30,16,0.422222,0.0
1,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-09-30,17,0.009259,0.0
2,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-09-30,18,0.135135,0.0
3,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-09-30,19,0.300000,0.0
4,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-09-30,20,0.310811,0.0
...,...,...,...,...,...
56,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-10-07,03,0.000000,0.0
57,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-10-07,05,0.000000,0.0
58,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-10-07,06,0.000000,0.0
59,FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF,2015-10-07,15,0.000000,0.0
