### Paper title : 
# Smartwatch-based ecological momentary assessments for occupant wellness and privacy in buildings

### Authors : 
Clayton Miller , Renee Christensen, Jin Kai Leong, Mahmoud Abdelrahman, Federico Tartarini, Matias Quintana, Andre Matthias Muller, and Mario Frei

In [28]:
#imports

import pandas as pd
import os
import matplotlib.pyplot as plt

import json

import plotly.graph_objects as go
import plotly

import schema.infection_schema as infection
import schema.fitness_schema as fitness
import schema.privacy_schema as privacy

In [12]:
# define the directories
plots_folder = "./plots/"
data_frames_folder ="./dataFrames/"
mapped_answers_folder ="./questionMap/"

In [5]:
# load the schema of each question. please refer to Figures 1, 2, and 3 in the paper.
privacy_mapped_answers = json.loads(open(mapped_answers_folder+"privacy_mapped_answers.json").read())
infection_mapped_answers = json.loads(open(mapped_answers_folder+"infection_mapped_answers.json").read())
fitness_mapped_answers = json.loads(open(mapped_answers_folder+"fitness_mapped_answers.json").read())

### Figure 1 infection risk question flow
<img width="400px" src="./figures/Fig1-covid.jpg">

In [8]:
# Figure 1
infection_mapped_answers

{'surroundingsIncreaseInfectionRisk': {'11': 'Not at all',
  '10': 'A lot',
  '9': 'A little'},
 'WhatCausesMoreRisk': {'11': 'People', '10': 'Surface', '9': 'Ventilation'},
 'SpecificallyWhatConcernsYou': {'11': 'Density',
  '10': 'Proximity',
  '9': 'Both'},
 'HowManyPeopleIn5M': {'11': '0 pax', '10': '1-4 pax', '9': '5+ pax'}}

### Figure 2 fitness question flow
<img width="400px" src="./figures/Fig2-movement.jpg">

In [9]:
# Figure 2
fitness_mapped_answers

{'stairsElevators': {'11': 'Lift',
  '10': 'Stairs',
  '9': 'Both',
  '12': 'Neither'},
 'workingNow': {'11': 'No', '10': 'Yes'},
 'BothLiftAndStairs_WhyLift': {'11': 'Less effort',
  '10': 'No stairs',
  '9': 'Convenient'},
 'BothLiftAndStairs_WhyStairs': {'11': 'Healthy',
  '10': 'No lift',
  '9': 'Convenient',
  '12': 'Save Energy'},
 'whyLift': {'11': 'Less effort', '10': 'No stairs', '9': 'Convenient'},
 'whyStairs': {'11': 'Healthy',
  '10': 'No lift',
  '9': 'Convenient',
  '12': 'Save Energy'},
 'workStationType': {'11': 'Standing', '10': 'Sitting', '9': 'adjustable'},
 'adjustedWorkstationToday': {'11': 'down',
  '10': 'up',
  '9': 'up&down',
  '12': 'never'},
 'StairsConvenientBecuause': {'11': 'Easiest', '10': 'Fastest', '9': 'Both'},
 'BOTH_StairsConvenientBecuause': {'11': 'Easiest',
  '10': 'Fastest',
  '9': 'Both'},
 'LiftConvenientBecuause': {'11': 'Easiest', '10': 'Fastest', '9': 'Both'},
 'BOTH_LiftConvenientBecuause': {'11': 'Easiest',
  '10': 'Fastest',
  '9': 'Both

### Figure 3 privacy question flow
<img width="400px" src="./figures/Fig3-privacy.jpg">

In [7]:
# Figure 3
privacy_mapped_answers

{'aloneOrInAGroup': {'11': 'Online', '10': 'Group', '9': 'Alone'},
 'Alone_categoryOfActivity': {'11': 'Focus', '10': 'Leisure'},
 'Group_categoryOfActivity': {'11': 'Collaborate',
  '10': 'Learn',
  '9': 'Socialize'},
 'Group_PossiblyDistractingOthers': {'11': 'No', '10': 'Yes'},
 'Online_categoryOfActivity': {'11': 'Collaborate',
  '10': 'Learn',
  '9': 'Socialize'},
 'DistractionsNearby': {'11': 'A little', '10': 'A lot', '9': 'None'},
 'NeedMorePrivacy': {'11': 'No', '10': 'Yes'},
 'ALittle_WhatKindOfDistraction': {'11': 'Audio',
  '10': 'Others',
  '9': 'Visual'},
 'ALot_WhatKindOfDistraction': {'11': 'Audio', '10': 'Others', '9': 'Visual'},
 'WhatIsIt': {'11': 'Thermal', '10': 'Glare', '9': 'Scent'},
 'whyMorePrivacyNeeded': {'11': 'See me', '10': 'Both', '9': 'Hear me'},
 'WhatPeopleSee': {'11': 'Work', '10': 'Behavior', '9': 'Appearance'}}

### Loading the data

In [29]:
# 1. The movement (fitness) data frame
FitnessDfs = pd.read_csv(data_frames_folder+"fitness.csv", parse_dates=True, index_col="time")
FitnessDfs.head()

Unnamed: 0_level_0,stairsElevators,workingNow,BothLiftAndStairs_WhyLift,BothLiftAndStairs_WhyStairs,whyLift,whyStairs,workStationType,adjustedWorkstationToday,StairsConvenientBecuause,BOTH_StairsConvenientBecuause,...,areYou,heartRate,lat,lon,Accuracy,Floor,Latitude,Longitude,Space_id,Userid
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-04-08 17:17:20.705000+08:00,12,10.0,,,,,10.0,,,,...,10.0,66.0,1.29654,103.77033,8.25,3.0,1.296825,103.770497,2.0,onith27
2021-04-08 18:45:15.500000+08:00,10,11.0,,,,10.0,,,,,...,11.0,74.0,1.291827,103.780415,13.75,3.0,1.296842,103.770511,2.0,onith27
2021-04-09 12:49:58.060000+08:00,10,11.0,,,,9.0,,,9.0,,...,10.0,114.0,,,,,,,,
2021-04-09 14:55:58.459000+08:00,12,10.0,,,,,10.0,,,,...,10.0,70.0,,,4.25,3.0,1.296843,103.770476,2.0,onith27
2021-04-09 15:13:30.673000+08:00,12,11.0,,,,,,,,,...,10.0,72.0,,,6.25,6.0,1.297191,103.770651,,onith27


In [30]:
# 2. The privacy data frame
PrivacyDfs= pd.read_csv(data_frames_folder+"privacy.csv", parse_dates=True, index_col="time")
PrivacyDfs.head()

Unnamed: 0_level_0,aloneOrInAGroup,Alone_categoryOfActivity,Group_categoryOfActivity,Group_PossiblyDistractingOthers,Online_categoryOfActivity,DistractionsNearby,NeedMorePrivacy,ALittle_WhatKindOfDistraction,ALot_WhatKindOfDistraction,WhatIsIt,...,WhatPeopleSee,heartRate,lat,lon,Accuracy,Floor,Latitude,Longitude,Space_id,Userid
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-04-09 15:53:02.138000+08:00,9,11.0,,,,9.0,11.0,,,,...,,,1.296755,103.7704,2.25,3.0,1.296773,103.770469,2.0,onith03
2021-04-09 15:58:49.642000+08:00,9,11.0,,,,9.0,11.0,,,,...,,,1.296755,103.7704,2.75,3.0,1.296782,103.770472,2.0,onith03
2021-04-09 16:49:44.329000+08:00,9,11.0,,,,9.0,11.0,,,,...,,,,,2.75,3.0,1.296782,103.770472,2.0,onith03
2021-04-19 19:13:27.840000+08:00,9,10.0,,,,9.0,11.0,,,,...,,,1.29678,103.770355,2.25,3.0,1.296814,103.77049,2.0,onith03
2021-04-19 20:28:33.419000+08:00,9,11.0,,,,9.0,11.0,,,,...,,,1.296782,103.770362,5.5,3.0,1.296802,103.770483,2.0,onith03


In [31]:
# 3. The infection risk data frame
InfectionDfs = pd.read_csv(data_frames_folder+"infection_risk.csv", parse_dates=True, index_col="time")
InfectionDfs.head()

Unnamed: 0_level_0,surroundingsIncreaseInfectionRisk,WhatCausesMoreRisk,SpecificallyWhatConcernsYou,HowManyPeopleIn5M,heartRate,lat,lon,Accuracy,Floor,Latitude,Longitude,Space_id,Userid,q1->4,q2->4,RiskCuase
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2021-04-08 13:21:18.229000+08:00,9,11.0,9.0,9.0,90,1.296762,103.770403,2.5,3.0,1.2968,103.770472,2.0,onith24,911-9,11 - 9,People
2021-04-08 13:22:10.304000+08:00,11,,,10.0,89,1.296762,103.770403,2.5,3.0,1.2968,103.770472,2.0,onith24,notAtAll-10.0,people - 10,
2021-04-08 13:29:57.195000+08:00,9,11.0,11.0,10.0,99,1.297043,103.770318,2.0,5.0,1.297014,103.770258,56.0,onith24,911-10,11 - 10,People
2021-04-08 13:30:08.899000+08:00,9,10.0,,10.0,97,1.297043,103.770318,2.25,5.0,1.297028,103.770198,56.0,onith24,911-10,10 - 10,Surface
2021-04-08 13:31:13.203000+08:00,9,9.0,,9.0,100,1.297043,103.770318,4.75,5.0,1.296972,103.770355,56.0,onith24,911-9,9 - 9,Ventilation


### Figure 4: Plotting the sankey diagrams of each question flow 

<img width="600" src="./figures/Fig4-results.jpg">

In [15]:
# 1. Movement sankey diagram
data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = [ 'Lift',                             #0
                'Stairs',                           #1   
                'Both',                             #2
                'Neither',                          #3
                'Less effort',                      #4
                'No stairs',                        #5
                'Convenient',                       #6
                'Fitness',                          #7
                'No lift',                          #8
                'Convenient',                       #9
                'Save Energy',                      #10
                'Worksatation type :Standing',      #11
                'Worksatation type :Sitting',       #12
                'Worksatation type :adjustable',    #13
                'Adjusted workstation down',        #14
                'Adjusted workstation up',          #15
                'Adjusted workstation up&down',     #16
                'Adjusted workstation never',       #17
                'Easiest',                          #18
                'Fastest',                          #19
                'Easier&Faster',                    #20
                'Standing',                         #21
                'Sitting',                          #22
                'Yes',                              #23
                'No',                               #24
                'end',                              #25
                "Working Now?"                      #26
                ],
      color = "blue"
    ),
   link = dict(
      source = [      1 ,  1, 1,  1,       0, 0,   2, 2, 2,    6,      1,1, 1,     6, 6, 6,     26, 26,      23,    12,12,], 
      target = [     6,  7, 8, 10,        4, 6,   4, 5, 6,    20,     9, 8, 6,    20,18,19,     23, 24,     12,     21,22,],
      value =  [    6, 12, 3,  1,        6, 2,   3, 3, 4,    3,      4, 3, 3,    2, 1, 1  ,     36, 46,     36,     14,36, ]
  ))]


plotly.offline.plot(data, filename=plots_folder+'fitness', image='svg')



Your filename `./plots/fitness` didn't end with .html. Adding .html to the end of your file.



'./plots/fitness.html'

In [17]:
# 2. Privacy sankey diagram

privacy_data = [
# source, target, number
[27, 0, 62],
[27, 1, 3], 
[27, 2, 4],
#alone type of activity 
[0, 3, 51],
[0, 4, 11],
# group Group_categoryOfActivity 9 Socialize 1 -- 10 Learn 1  -- 11 Collaborate 1
[1, 7, 1],
[1, 6, 1],
[1, 5, 1],
# Group_PossiblyDistractingOther. Yes 2 No 1
[1, 8, 2],
[1, 9, 1],
# Online_categoryOfActivity Learn 2
[2, 6, 2],
# DistractionsNearby None 36	A little 26	A lot 5	
[28, 11, 36],
[28, 12, 26],
[28, 10, 5],
# NeedMorePrivacy No 61	Yes 8	
[11, 14, 36],
[12, 14, 21],
[12, 13, 5],
[10, 13, 5],
# ALittle_WhatKindOfDistraction Audio 25	Visual 1	
[12, 15, 25],
[12, 17, 1],
# ALot_WhatKindOfDistraction Audio 4	Visual 1	
[10, 15, 4],
[10, 17, 1],
# WhatIsIt 

# whyMorePrivacyNeeded See me 4	Both 3	Hear me 3	
[13, 21, 4],
[13, 22, 3],
[13, 23, 3],


# WhatPeopleSee Work 3	Appearance 2
[21, 24, 3], 
[21, 26, 2]	
]


data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = [ "Alone",#0
                "Group",#1
                "Online",#2

                "Focus",#3
                "Leisure",#4

                "Collaborate",#5
                "Learn",#6
                "Socialize",#7

                "Distracting others? Yes",#8
                "Distracting others? No",#9

                "A lot",#10
                "None",#11
                "a little",#12

                "Need more privacy? Yes",#13
                "Need more privacy? No",#14

                "Audio",#15
                "Other distractions",#16
                "Visual",#17

                "Thermal",#18
                "Glare",#19
                "Scent",#20

                "People see me",#21
                "people hear me",#22
                "see and hear me",#23

                "Work", #24
                "Behaviour",#25
                "Appearance",#26
                "Are you ? ", #27
                "Distractions nearby ?", #28
                ],
      color = "blue"
    ),
   link = dict(
      source = [x[0] for x in privacy_data], 
      target = [x[1] for x in privacy_data],
      value =  [x[2] for x in privacy_data]
  ))]

plotly.offline.plot(data, filename=plots_folder+'privacy', image='svg')



Your filename `./plots/privacy` didn't end with .html. Adding .html to the end of your file.



'./plots/privacy.html'

In [20]:
# 3. Infection risk sankey diagram
infection_data = [
# surroundingsIncreaseInfectionRisk Not at all 53	A lot 38	A little 38	
[12,0,53],
[12, 1, 76],
# WhatCausesMoreRisk Surface 40	Ventilation 27	People 9	
[1, 3, 40],
[1, 4, 27],
[1, 5, 9],
# SpecificallyWhatConcernsYou Density 4	Proximity 3	Both 2	
[5,6, 4],
[5,7, 3],
[5,8, 2],
# HowManyPeopleIn5M 0 pax 64	1-4 pax 41	5+ pax 24
[0, 9, 43],
[0, 10, 8],
[0, 11, 2],

[5, 10, 6],
[5, 11, 2],
[5, 9, 1],
[3, 9, 20],
[3, 10, 16],
[3, 11, 4],
[4, 11, 16 ],
[4, 10, 11 ],
]
data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = [ 
          
          "Not at all",#0 
          "A little, A lot",#1
          "A lot",#2
          "Surface",#3
          "Ventilation",#4
          "People",#5
          "Density",#6
          "Proximity",#7
          "Both",#8
          "0 pax",#9
          "1-4 pax",#10
          "5+ pax",#11
           "Surroundings increase risk ? ",#12
           "How many people "
           ],
      color = "blue"
    ),
   link = dict(
      source = [x[0] for x in infection_data], 
      target = [x[1] for x in infection_data],
      value =  [x[2] for x in infection_data]
  ))]

# fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
# fig.show()
plotly.offline.plot(data, filename=plots_folder+'infection', image='svg')



Your filename `./plots/infection` didn't end with .html. Adding .html to the end of your file.



'./plots/infection.html'

### Figure 4 - spatial mapping of infection risk 
The following code is a spatial scatter plot of the infection cause preception. It represents answers to the question "What causes more risk?" 

1. People, 
2. Surfaces, or 
3. Ventilation

<img width="300" src="./figures/risk_cause.png">

to plot the data correctly, mapbox_access_token is required, please refer to https://docs.mapbox.com/help/glossary/access-token/

In [24]:
mapDf = InfectionDfs[InfectionDfs["Floor"]==3.0]
mapDf = mapDf[mapDf["RiskCuase"].notna()]

In [None]:
import plotly.express as px
mapbox_access_token = ""   # you need to get a mapbox public token to be able to render the map
                           # for more information about getting mapbox token : https://docs.mapbox.com/help/glossary/access-token/
px.set_mapbox_access_token(mapbox_access_token)
# fig = px.density_mapbox(newDf, lat="Latitude", lon="Longitude",opacity=0.8, z="surroundingsIncreaseInfectionRisk",zoom=18,radius=30)
fig = px.scatter_mapbox(mapDf, lat="Latitude", lon="Longitude",opacity=0.5, color="RiskCuase",zoom=18, size="surroundingsIncreaseInfectionRisk",size_max=10)
fig.show()