In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil import parser
import requests
import json
from ipywidgets import widgets 

pd.options.plotting.backend = "plotly"

%matplotlib inline
%run global.ipynb

# Data setup

## Fetch data

In [3]:
from ipywidgets import widgets
options = ["Day", "Week"]
dropdown = widgets.Dropdown(options=options) 
mode = dropdown.value
def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        global mode
        mode = change['new']
    
dropdown.observe(on_change)
display(dropdown)

Dropdown(options=('Day', 'Week'), value='Day')

In [3]:
import os

if mode == 'Day':
    with open(os.path.join(os.getcwd(), 'data/dataset-2-2-day-2023-03-07'), 'r') as f:
        data = json.load(f)
else:
    data = []
    datasets = ['data/dataset-2-2-week-2023-03-07-1', 'data/dataset-2-2-week-2023-03-07-2', 'data/dataset-2-2-week-2023-03-07-3']
    for dataset in datasets:
        with open(os.path.join(os.getcwd(), dataset), 'r') as f:
            tmp_data = json.load(f)
            data.extend(tmp_data)

In [4]:
%run global.ipynb

In [5]:
rows = []
user_list = []
for row in data:
    if (row['compareDate'] is None or row['compareDate'] == 'Invalid date'):
        continue
    user = [row['id'], row['gender'], row['ageRange'], row['occupation'], parser.parse(row['compareDate']), row['stepsEstimate']]
    user_list.append(user)
    for hour in row['rows']:
        rows.append(user + [hour['series'], hour['hour'], hour['value']])

users_df = pd.DataFrame(user_list, columns=['id', 'gender', 'ageRange', 'occupation', 'compareDate', 'stepsEstimate'])
df = pd.DataFrame(rows, columns=['id', 'gender', 'ageRange', 'occupation', 'compareDate', 'stepsEstimate', 'series', 'hour', 'steps'])

In [6]:
df

Unnamed: 0,id,gender,ageRange,occupation,compareDate,stepsEstimate,series,hour,steps
0,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,10,74.400000
1,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,11,113.466667
2,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,12,267.066667
3,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,13,178.866667
4,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,14,323.000000
...,...,...,...,...,...,...,...,...,...
694051,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,05,10.445672
694052,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,06,364.363903
694053,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,07,979.696745
694054,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,08,424.530925


In [7]:
df.describe()

Unnamed: 0,stepsEstimate,steps
count,681552.0,694056.0
mean,-0.123252,296.931812
std,0.354606,406.192781
min,-1.0,0.0
25%,-0.35,12.258065
50%,-0.1,164.662601
75%,0.05,438.259186
max,2.0,36653.066667


In [8]:
work_ages = ['25-34', '35-44','45-54','55-64']
df['work_age'] = df.ageRange.isin(work_ages)

In [9]:
df['period'] = np.select([
    (df.series > before_period_start) & (df.series < before_period_end),
    (df.series > after_period_start) & (df.series < after_period_end)
], ['Before', 'During'], default='none')

In [10]:
%store df

Stored 'df' (DataFrame)


In [11]:
%store users_df

Stored 'users_df' (DataFrame)


## Covid cases

In [12]:
# From Folkhälsomyndigheten, weekly cases nationwide summed up for each month https://www.folkhalsomyndigheten.se/smittskydd-beredskap/utbrott/aktuella-utbrott/covid-19/statistik-och-analyser/bekraftade-fall-i-sverige/

data = json.loads("""[
    { "date": "2019-03-15", "value": 0 },
    { "date": "2019-04-15", "value": 0 },
    { "date": "2019-05-15", "value": 0 },
    { "date": "2019-06-15", "value": 0 },
    { "date": "2019-07-15", "value": 0 },
    { "date": "2019-08-15", "value": 0 },
    { "date": "2019-09-15", "value": 0 },
    { "date": "2019-10-15", "value": 0 },
    { "date": "2019-11-15", "value": 0 },
    { "date": "2019-12-15", "value": 0 },
    { "date": "2020-01-15", "value": 0 },
    { "date": "2020-02-15", "value": 14 },
    { "date": "2020-03-15", "value": 7159 },
    { "date": "2020-04-15", "value": 15527 },
    { "date": "2020-05-15", "value": 15961 },
    { "date": "2020-06-15", "value": 31951 },
    { "date": "2020-07-15", "value": 6673 },
    { "date": "2020-08-15", "value": 8285 },
    { "date": "2020-09-15", "value": 10233 },
    { "date": "2020-10-15", "value": 37527 },
    { "date": "2020-11-15", "value": 161065 },
    { "date": "2020-12-15", "value": 207718 },
    { "date": "2021-01-15", "value": 92437 },
    { "date": "2021-02-15", "value": 99336 }
  ]""")
rows = []
for row in data:
    rows.append([parser.parse(row['date']), row['value']])

covid_cases = pd.DataFrame(rows, columns=['date', 'cases'])

%store covid_cases

Stored 'covid_cases' (DataFrame)
