### Paper title : 
# Smartwatch-based ecological momentary assessments for occupant wellness and privacy in buildings

### Authors : 
Clayton Miller , Renee Christensen, Jin Kai Leong, Mahmoud Abdelrahman, Federico Tartarini, Matias Quintana, Andre Matthias Muller, and Mario Frei

In [2]:
#imports

import pandas as pd

#plots
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly

#utils
import json
import copy
import os

#data
import schema.infection_schema as infection
import schema.fitness_schema as fitness
import schema.privacy_schema as privacy

In [3]:
# define the directories
plots_folder = "./plots/"
data_frames_folder ="./dataFrames/"
mapped_answers_folder ="./questionMap/"

In [6]:
# load the schema of each question. please refer to Figures 1, 2, and 3 in the paper.
# Please make sure that every answer is unique ... 
# for example if there are two questions with Yes or No answers, make sure to make them unique for example Yes1, and Yes2. 
privacy_mapped_answers = json.loads(open(mapped_answers_folder+"privacy_mapped_answers.json").read())
infection_mapped_answers = json.loads(open(mapped_answers_folder+"infection_mapped_answers.json").read())
fitness_mapped_answers = json.loads(open(mapped_answers_folder+"fitness_mapped_answers.json").read())

### Figure 1 infection risk question flow
<img width="400px" src="./figures/Fig1-covid.jpg">

In [5]:
# Figure 1
infection_mapped_answers

{'surroundingsIncreaseInfectionRisk': {'11': 'Not at all',
  '10': 'A lot',
  '9': 'A little'},
 'WhatCausesMoreRisk': {'11': 'People', '10': 'Surface', '9': 'Ventilation'},
 'SpecificallyWhatConcernsYou': {'11': 'Density',
  '10': 'Proximity',
  '9': 'Both'},
 'HowManyPeopleIn5M': {'11': '0 pax', '10': '1-4 pax', '9': '5+ pax'}}

In [7]:
# Figure 3
privacy_mapped_answers

{'aloneOrInAGroup': {'11': 'Online', '10': 'Group', '9': 'Alone'},
 'Alone_categoryOfActivity': {'11': 'Focus', '10': 'Leisure'},
 'Group_categoryOfActivity': {'11': 'Collaborate',
  '10': 'Learn',
  '9': 'Socialize'},
 'Group_PossiblyDistractingOthers': {'11': 'No1', '10': 'Yes1'},
 'Online_categoryOfActivity': {'11': 'Collaborate',
  '10': 'Learn',
  '9': 'Socialize'},
 'DistractionsNearby': {'11': 'A little', '10': 'A lot', '9': 'None'},
 'NeedMorePrivacy': {'11': 'No2', '10': 'Yes2'},
 'ALittle_WhatKindOfDistraction': {'11': 'Audio',
  '10': 'Others1',
  '9': 'Visual'},
 'ALot_WhatKindOfDistraction': {'11': 'Audio', '10': 'Others1', '9': 'Visual'},
 'WhatIsIt': {'11': 'Thermal', '10': 'Glare', '9': 'Scent'},
 'whyMorePrivacyNeeded': {'11': 'See me', '10': 'Both', '9': 'Hear me'},
 'WhatPeopleSee': {'11': 'Work', '10': 'Behavior', '9': 'Appearance'}}

<img width="400px" src="./figures/Fig3-privacy.jpg">

# Privacy flow pre-processing

In [10]:
# Privacy flow pre-processing

privacy_labels = {}
privacy_source_target=[]
privacy_columns = []
questionOptions ={}
next_question_options ={}

for index, i in enumerate(privacy.privacy):
    privacy_columns.append(i["name"])
    privacy_labels[i["name"]] = str(int(index))
    next_question_options[i["name"]]={}
    for k , v in i["answerDirectTo"].items():
        next_question_options[i["name"]][k] = v["next"]
    # check if the question has 2 or 3 answers. 
    if (len(i["iconText"]) ==3):
         questionOptions[i["name"]]= {11 : i["iconText"][0], 10 : i["iconText"][1], 9 : i["iconText"][2]} 

    elif (len(i["iconText"]) ==2):
         questionOptions[i["name"]]= {11 : i["iconText"][0], 10 : i["iconText"][1]} 

for index, i in enumerate(privacy.privacy):
    for k, v in i["answerDirectTo"].items():
        if v["next"] != "end":
            if not [index, privacy_labels[v["next"]]] in privacy_source_target:
                privacy_source_target.append([index, privacy_labels[v["next"]]])

next_question_options

{'aloneOrInAGroup': {'11': 'Online_categoryOfActivity',
  '10': 'Group_categoryOfActivity',
  '9': 'Alone_categoryOfActivity'},
 'Alone_categoryOfActivity': {'11': 'DistractionsNearby',
  '10': 'DistractionsNearby'},
 'Group_categoryOfActivity': {'11': 'Group_PossiblyDistractingOthers',
  '10': 'Group_PossiblyDistractingOthers',
  '9': 'Group_PossiblyDistractingOthers'},
 'Group_PossiblyDistractingOthers': {'11': 'DistractionsNearby',
  '10': 'DistractionsNearby'},
 'Online_categoryOfActivity': {'11': 'DistractionsNearby',
  '10': 'DistractionsNearby',
  '9': 'DistractionsNearby'},
 'DistractionsNearby': {'11': 'ALittle_WhatKindOfDistraction',
  '10': 'ALot_WhatKindOfDistraction',
  '9': 'NeedMorePrivacy'},
 'NeedMorePrivacy': {'11': 'end', '10': 'whyMorePrivacyNeeded'},
 'ALittle_WhatKindOfDistraction': {'11': 'NeedMorePrivacy',
  '10': 'WhatIsIt',
  '9': 'NeedMorePrivacy'},
 'ALot_WhatKindOfDistraction': {'11': 'NeedMorePrivacy',
  '10': 'WhatIsIt',
  '9': 'NeedMorePrivacy'},
 'WhatI

### Loading the data

In [11]:
# 2. The privacy data frame
PrivacyDfs= pd.read_csv(data_frames_folder+"privacy.csv", parse_dates=True, index_col="time")
PrivacyDfs = PrivacyDfs[PrivacyDfs["Userid"]!= "onith35"]

#copy the dataframe 
MappedPrivacyDf = copy.deepcopy(PrivacyDfs)[privacy_columns]

#convert all answers into stringified integer
def intIt(x):
    try:
        return str(int(x))
    except:
        return -10

for i in MappedPrivacyDf.columns:
    try:
        MappedPrivacyDf[i]=MappedPrivacyDf[i].apply(lambda x : intIt(x))
    except Exception as e:
        print(str(e))

#lambda function to map each cell to its string answer
def mapIt(x, columnName):
    if x != -10:
        return privacy_mapped_answers[columnName][str(int(x))]
    else:
        return "none"


for i in MappedPrivacyDf.columns:
    try:
        MappedPrivacyDf[i]=MappedPrivacyDf[i].apply(lambda x :mapIt(x, i))
    except Exception as e:
        print(str(e))

MappedPrivacyDf.head()

Unnamed: 0_level_0,aloneOrInAGroup,Alone_categoryOfActivity,Group_categoryOfActivity,Group_PossiblyDistractingOthers,Online_categoryOfActivity,DistractionsNearby,NeedMorePrivacy,ALittle_WhatKindOfDistraction,ALot_WhatKindOfDistraction,WhatIsIt,whyMorePrivacyNeeded,WhatPeopleSee
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-04-09 15:53:02.138000+08:00,Alone,Focus,none,none,none,,No2,none,none,none,none,none
2021-04-09 15:58:49.642000+08:00,Alone,Focus,none,none,none,,No2,none,none,none,none,none
2021-04-09 16:49:44.329000+08:00,Alone,Focus,none,none,none,,No2,none,none,none,none,none
2021-04-19 19:13:27.840000+08:00,Alone,Leisure,none,none,none,,No2,none,none,none,none,none
2021-04-19 20:28:33.419000+08:00,Alone,Focus,none,none,none,,No2,none,none,none,none,none


In [12]:
#list of all answers
privacy_all_answers= []
for k, v in privacy_mapped_answers.items():
    for j, v2 in v.items():
        if not v2 in privacy_all_answers:
            privacy_all_answers.append(v2)

#pair of answers for example ["Group", "socialize"]
privacy_pairs = []
def getChild(src, item):
    if item != "end":
        for k2, v2 in privacy_mapped_answers[item].items():
            privacy_pairs.append([src, v2])


for k ,v  in next_question_options.items():
    for k2, v2 in v.items():
        getChild(privacy_mapped_answers[k][k2], v2)

In [13]:
privacy_all_answers

['Online',
 'Group',
 'Alone',
 'Focus',
 'Leisure',
 'Collaborate',
 'Learn',
 'Socialize',
 'No1',
 'Yes1',
 'A little',
 'A lot',
 'None',
 'No2',
 'Yes2',
 'Audio',
 'Others1',
 'Visual',
 'Thermal',
 'Glare',
 'Scent',
 'See me',
 'Both',
 'Hear me',
 'Work',
 'Behavior',
 'Appearance']

In [15]:
privacy_pairs_indeces = [[privacy_all_answers.index(i[0]), privacy_all_answers.index(i[1])] for i in privacy_pairs]
# privacy_pairs_indeces


In [16]:
MappedPrivacyDfNumbered= copy.deepcopy(MappedPrivacyDf)
def indexIt(x):
    try:
        return privacy_all_answers.index(x)
    except:
        return -10

for i in MappedPrivacyDfNumbered:
    MappedPrivacyDfNumbered[i] = MappedPrivacyDfNumbered[i].apply(lambda x : indexIt(x))
MappedPrivacyDfNumbered.head() 

Unnamed: 0_level_0,aloneOrInAGroup,Alone_categoryOfActivity,Group_categoryOfActivity,Group_PossiblyDistractingOthers,Online_categoryOfActivity,DistractionsNearby,NeedMorePrivacy,ALittle_WhatKindOfDistraction,ALot_WhatKindOfDistraction,WhatIsIt,whyMorePrivacyNeeded,WhatPeopleSee
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-04-09 15:53:02.138000+08:00,2,3,-10,-10,-10,12,13,-10,-10,-10,-10,-10
2021-04-09 15:58:49.642000+08:00,2,3,-10,-10,-10,12,13,-10,-10,-10,-10,-10
2021-04-09 16:49:44.329000+08:00,2,3,-10,-10,-10,12,13,-10,-10,-10,-10,-10
2021-04-19 19:13:27.840000+08:00,2,4,-10,-10,-10,12,13,-10,-10,-10,-10,-10
2021-04-19 20:28:33.419000+08:00,2,3,-10,-10,-10,12,13,-10,-10,-10,-10,-10


In [17]:

sourceTargetDf = pd.DataFrame({"sourceTarget": []})
sourceTargetDf

sourceTarget = []
for k , v in MappedPrivacyDfNumbered.transpose().to_dict().items():
    row = [v2 for k2 , v2 in v.items()]
    for i in privacy_pairs_indeces:
        if i[0] in row and i[1] in row:
            if [i[0], i[1]] in privacy_pairs_indeces:
                sourceTarget.append(str(i))

sourceTargetDf["sourceTarget"] = sourceTarget


In [18]:
privacy_source_target_count = pd.DataFrame(sourceTargetDf["sourceTarget"].value_counts()).to_dict()

privacy_data = []
for k, v in privacy_source_target_count["sourceTarget"].items():
    d = json.loads(k)
    d.append(v)
    privacy_data.append(d)
privacy_data

[[2, 3, 34],
 [12, 13, 29],
 [15, 13, 28],
 [3, 12, 19],
 [10, 15, 14],
 [3, 10, 13],
 [4, 12, 8],
 [2, 4, 8],
 [15, 14, 4],
 [11, 15, 2],
 [14, 23, 2],
 [6, 12, 2],
 [3, 11, 2],
 [1, 5, 1],
 [0, 6, 1],
 [1, 6, 1],
 [9, 10, 1],
 [8, 12, 1],
 [5, 10, 1],
 [6, 8, 1],
 [5, 9, 1]]

In [19]:
#questions to answers 
for i in privacy.privacy:
    print(i["questionText"] + i["questionSecondText"] )

Alone or in a group?
Category of Activity?
Category of Activity?
Possible distracting?others?
Category of Activity?
Distractions nearby?
Feeling like you needmore privacy?
What kind of distraction?
What kind of distraction?
What is it?
Why more privacyneeded?
What do people see?


In [20]:
# 2. Privacy sankey diagram
data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = privacy_all_answers,
      color = "blue"
    ),
   link = dict(
      source = [x[0] for x in privacy_data], 
      target = [x[1] for x in privacy_data],
      value =  [x[2] for x in privacy_data]
  ))]

plotly.offline.plot(data, filename=plots_folder+'privacy', image='svg')



Your filename `./plots/privacy` didn't end with .html. Adding .html to the end of your file.



'./plots/privacy.html'

### Figure 4 - spatial mapping of infection risk 
The following code is a spatial scatter plot of the infection cause preception. It represents answers to the question "What causes more risk?" 

1. People, 
2. Surfaces, or 
3. Ventilation

<img width="300" src="./figures/risk_cause.png">

to plot the data correctly, mapbox_access_token is required, please refer to https://docs.mapbox.com/help/glossary/access-token/

In [24]:
mapDf = InfectionDfs[InfectionDfs["Floor"]==3.0]
mapDf = mapDf[mapDf["RiskCuase"].notna()]

In [None]:
import plotly.express as px
mapbox_access_token = ""   # you need to get a mapbox public token to be able to render the map
                           # for more information about getting mapbox token : https://docs.mapbox.com/help/glossary/access-token/
px.set_mapbox_access_token(mapbox_access_token)
# fig = px.density_mapbox(newDf, lat="Latitude", lon="Longitude",opacity=0.8, z="surroundingsIncreaseInfectionRisk",zoom=18,radius=30)
fig = px.scatter_mapbox(mapDf, lat="Latitude", lon="Longitude",opacity=0.5, color="RiskCuase",zoom=18, size="surroundingsIncreaseInfectionRisk",size_max=10)
fig.show()