### Paper title : 
# Smartwatch-based ecological momentary assessments for occupant wellness and privacy in buildings

### Authors : 
Clayton Miller , Renee Christensen, Jin Kai Leong, Mahmoud Abdelrahman, Federico Tartarini, Matias Quintana, Andre Matthias Muller, and Mario Frei

In [1]:
#imports

import pandas as pd


#plots
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly

#utils
import json
import copy
import os

#data
import schema.infection_schema as infection
import schema.fitness_schema as fitness
import schema.privacy_schema as privacy


In [2]:
# define the directories
plots_folder = "./plots/"
data_frames_folder ="./dataFrames/"
mapped_answers_folder ="./questionMap/"

In [3]:
# load the schema of each question. please refer to Figures 1, 2, and 3 in the paper.
privacy_mapped_answers = json.loads(open(mapped_answers_folder+"privacy_mapped_answers.json").read())
infection_mapped_answers = json.loads(open(mapped_answers_folder+"infection_mapped_answers.json").read())
fitness_mapped_answers = json.loads(open(mapped_answers_folder+"fitness_mapped_answers.json").read())

### Figure 1 infection risk question flow
<img width="400px" src="./figures/Fig1-covid.jpg">

In [4]:
# Figure 1
infection_mapped_answers

{'surroundingsIncreaseInfectionRisk': {'11': 'Not at all',
  '10': 'A lot',
  '9': 'A little'},
 'WhatCausesMoreRisk': {'11': 'People', '10': 'Surface', '9': 'Ventilation'},
 'SpecificallyWhatConcernsYou': {'11': 'Density',
  '10': 'Proximity',
  '9': 'Both'},
 'HowManyPeopleIn5M': {'11': '0 pax', '10': '1-4 pax', '9': '5+ pax'}}

In [5]:
infection_labels = {}
infection_source_target=[]
infection_columns = []
questionOptions ={}
next_question_options ={}
for index, i in enumerate(infection.infection):
    infection_columns.append(i["name"])
    # print(index, i["displayName"],"|", i["name"])
    infection_labels[i["name"]] = str(int(index))
    next_question_options[i["name"]]={}
    for k , v in i["answerDirectTo"].items():
        next_question_options[i["name"]][k] = v["next"]

    if (len(i["iconText"]) ==3):
         questionOptions[i["name"]]= {11 : i["iconText"][0], 10 : i["iconText"][1], 9 : i["iconText"][2]} 

    elif (len(i["iconText"]) ==2):
         questionOptions[i["name"]]= {11 : i["iconText"][0], 10 : i["iconText"][1]} 

for index, i in enumerate(infection.infection):
    for k, v in i["answerDirectTo"].items():
        if v["next"] != "end":
            if not [index, infection_labels[v["next"]]] in infection_source_target:
                infection_source_target.append([index, infection_labels[v["next"]]])
next_question_options

{'surroundingsIncreaseInfectionRisk': {'11': 'HowManyPeopleIn5M',
  '10': 'WhatCausesMoreRisk',
  '9': 'WhatCausesMoreRisk'},
 'WhatCausesMoreRisk': {'11': 'SpecificallyWhatConcernsYou',
  '10': 'HowManyPeopleIn5M',
  '9': 'HowManyPeopleIn5M'},
 'SpecificallyWhatConcernsYou': {'11': 'HowManyPeopleIn5M',
  '10': 'HowManyPeopleIn5M',
  '9': 'HowManyPeopleIn5M'},
 'HowManyPeopleIn5M': {'11': 'end', '10': 'end', '9': 'end'}}

### Loading the data

In [7]:
# 2. The infection data frame
infectionDfs= pd.read_csv(data_frames_folder+"infection.csv", parse_dates=True, index_col="time")
infectionDfs = infectionDfs[infectionDfs["Userid"]!= "onith35"]

MappedinfectionDf = copy.deepcopy(infectionDfs)[infection_columns]

def intIt(x):
    try:
        # print(str(int(x)))
        return str(int(x))
    except:
        return -10

for i in MappedinfectionDf.columns:
    try:
        # print(i, infection_mapped_answers[i])
        MappedinfectionDf[i]=MappedinfectionDf[i].apply(lambda x : intIt(x))
        # MappedinfectionDf[i]=MappedinfectionDf[i].apply(lambda x :infection_mapped_answers[i][str(int(x))])
    except Exception as e:
        print(str(e))

def mapIt(x, columnName):
    if x != -10:
        # return [infection_mapped_answers[columnName][str(int(x))],next_question_options[columnName][str(int(x))]]
        return infection_mapped_answers[columnName][str(int(x))]
    else:
        return "none"
for i in MappedinfectionDf.columns:
    # print(i)
    try:
        MappedinfectionDf[i]=MappedinfectionDf[i].apply(lambda x :mapIt(x, i))
    except Exception as e:
        print(str(e))

MappedinfectionDf.head()

Unnamed: 0_level_0,surroundingsIncreaseInfectionRisk,WhatCausesMoreRisk,SpecificallyWhatConcernsYou,HowManyPeopleIn5M
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-04-08 13:21:18.229000+08:00,A little,People,Both,5+ pax
2021-04-08 13:22:10.304000+08:00,Not at all,none,none,1-4 pax
2021-04-08 13:29:57.195000+08:00,A little,People,Density,1-4 pax
2021-04-08 13:30:08.899000+08:00,A little,Surface,none,1-4 pax
2021-04-08 13:31:13.203000+08:00,A little,Ventilation,none,5+ pax


In [8]:
#list of all answers
infection_all_answers= []
for k, v in infection_mapped_answers.items():
    for j, v2 in v.items():
        if not v2 in infection_all_answers:
            infection_all_answers.append(v2)

infection_pairs = []
def getChild(src, item):
    if item != "end":
        for k2, v2 in infection_mapped_answers[item].items():
            infection_pairs.append([src, v2])

for k ,v  in next_question_options.items():
    for k2, v2 in v.items():
        getChild(infection_mapped_answers[k][k2], v2)

In [9]:
infection_all_answers

['Not at all',
 'A lot',
 'A little',
 'People',
 'Surface',
 'Ventilation',
 'Density',
 'Proximity',
 'Both',
 '0 pax',
 '1-4 pax',
 '5+ pax']

In [10]:
infection_pairs_indeces = [[infection_all_answers.index(i[0]), infection_all_answers.index(i[1])] for i in infection_pairs]
# infection_pairs_indeces


In [11]:
MappedinfectionDfNumbered= copy.deepcopy(MappedinfectionDf)
def indexIt(x):
    try:
        return infection_all_answers.index(x)
    except:
        return -10

for i in MappedinfectionDfNumbered:
    MappedinfectionDfNumbered[i] = MappedinfectionDfNumbered[i].apply(lambda x : indexIt(x))
MappedinfectionDfNumbered.head() 

Unnamed: 0_level_0,surroundingsIncreaseInfectionRisk,WhatCausesMoreRisk,SpecificallyWhatConcernsYou,HowManyPeopleIn5M
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-04-08 13:21:18.229000+08:00,2,3,8,11
2021-04-08 13:22:10.304000+08:00,0,-10,-10,10
2021-04-08 13:29:57.195000+08:00,2,3,6,10
2021-04-08 13:30:08.899000+08:00,2,4,-10,10
2021-04-08 13:31:13.203000+08:00,2,5,-10,11


In [12]:

sourceTargetDf = pd.DataFrame({"sourceTarget": []})
sourceTargetDf

sourceTarget = []
for k , v in MappedinfectionDfNumbered.transpose().to_dict().items():
    row = [v2 for k2 , v2 in v.items()]
    for i in infection_pairs_indeces:
        if i[0] in row and i[1] in row:
            if [i[0], i[1]] in infection_pairs_indeces:
                sourceTarget.append(str(i))

sourceTargetDf["sourceTarget"] = sourceTarget


In [13]:
infection_source_target_count = pd.DataFrame(sourceTargetDf["sourceTarget"].value_counts()).to_dict()

infection_data = []
for k, v in infection_source_target_count["sourceTarget"].items():
    d = json.loads(k)
    d.append(v)
    infection_data.append(d)
infection_data

[[0, 9, 20],
 [2, 5, 18],
 [5, 11, 14],
 [2, 4, 9],
 [4, 10, 6],
 [2, 3, 4],
 [5, 10, 4],
 [3, 6, 3],
 [6, 10, 2],
 [0, 10, 2],
 [4, 11, 2],
 [3, 8, 1],
 [8, 11, 1],
 [6, 9, 1],
 [4, 9, 1]]

In [14]:
#questions to answers 
for i in infection.infection:
    print(i["questionText"] + i["questionSecondText"] )

Do your surroundingsincrease infection risk?
What causes more risk?
Specifically, whatconcerns you?
Currently, how manypeople within 5m?


In [16]:
# 2. infection sankey diagram
data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = infection_all_answers,
      color = "blue"
    ),
   link = dict(
      source = [x[0] for x in infection_data], 
      target = [x[1] for x in infection_data],
      value =  [x[2] for x in infection_data]
  ))]

plotly.offline.plot(data, filename=plots_folder+'infection', image='svg')


'./plots/infection.html'

In [9]:
# 3. Infection risk sankey diagram
infection_data = [
    # surroundingsIncreaseInfectionRisk Not at all 53	A lot 38	A little 38
    [12, 0, 53],
    [12, 1, 76],
    # WhatCausesMoreRisk Surface 40	Ventilation 27	People 9
    [1, 3, 40],
    [1, 4, 27],
    [1, 5, 9],
    # SpecificallyWhatConcernsYou Density 4	Proximity 3	Both 2
    [5, 6, 4],
    [5, 7, 3],
    [5, 8, 2],
    # HowManyPeopleIn5M 0 pax 64	1-4 pax 41	5+ pax 24
    [0, 9, 43],
    [0, 10, 8],
    [0, 11, 2],
    [5, 10, 6],
    [5, 11, 2],
    [5, 9, 1],
    [3, 9, 20],
    [3, 10, 16],
    [3, 11, 4],
    [4, 11, 16],
    [4, 10, 11],
]


label = [
    "Not at all",  # 0
    "A little, A lot",  # 1
    "A lot",  # 2
    "Surface",  # 3
    "Ventilation",  # 4
    "People",  # 5
    "Density",  # 6
    "Proximity",  # 7
    "Both",  # 8
    "0 pax",  # 9
    "1-4 pax",  # 10
    "5+ pax",  # 11
    "Surroundings increase risk ? ",  # 12
    "How many people ",
]

labels = {i: index for i, index in enumerate(label)}
labels

{0: 'Not at all',
 1: 'A little, A lot',
 2: 'A lot',
 3: 'Surface',
 4: 'Ventilation',
 5: 'People',
 6: 'Density',
 7: 'Proximity',
 8: 'Both',
 9: '0 pax',
 10: '1-4 pax',
 11: '5+ pax',
 12: 'Surroundings increase risk ? ',
 13: 'How many people '}

### Figure 4 - spatial mapping of infection risk 
The following code is a spatial scatter plot of the infection cause preception. It represents answers to the question "What causes more risk?" 

1. People, 
2. Surfaces, or 
3. Ventilation

<img width="300" src="./figures/risk_cause.png">

to plot the data correctly, mapbox_access_token is required, please refer to https://docs.mapbox.com/help/glossary/access-token/

In [24]:
mapDf = InfectionDfs[InfectionDfs["Floor"]==3.0]
mapDf = mapDf[mapDf["RiskCuase"].notna()]

In [None]:
import plotly.express as px
mapbox_access_token = ""   # you need to get a mapbox public token to be able to render the map
                           # for more information about getting mapbox token : https://docs.mapbox.com/help/glossary/access-token/
px.set_mapbox_access_token(mapbox_access_token)
# fig = px.density_mapbox(newDf, lat="Latitude", lon="Longitude",opacity=0.8, z="surroundingsIncreaseInfectionRisk",zoom=18,radius=30)
fig = px.scatter_mapbox(mapDf, lat="Latitude", lon="Longitude",opacity=0.5, color="RiskCuase",zoom=18, size="surroundingsIncreaseInfectionRisk",size_max=10)
fig.show()