In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil import parser
import requests
import json
from ipywidgets import widgets 

pd.options.plotting.backend = "plotly"

%matplotlib inline
%run global.ipynb

# Data setup

## Fetch data

In [2]:
import os

with open(os.path.join(os.getcwd(), 'data/dataset-2-2-day-2023-03-07'), 'r') as f:
    day_data = json.load(f)

week_data = []
datasets = ['data/dataset-2-2-week-2023-03-07-1', 'data/dataset-2-2-week-2023-03-07-2', 'data/dataset-2-2-week-2023-03-07-3']
for dataset in datasets:
    with open(os.path.join(os.getcwd(), dataset), 'r') as f:
        tmp_data = json.load(f)
        week_data.extend(tmp_data)

In [2]:
import os

with open(os.path.join(os.getcwd(), 'data/steps_with_est.json'), 'r') as f:
    estimates_data = json.load(f)

estimates_df = estimates_data_to_df(estimates_data)

estimates_df

Unnamed: 0,date,steps,userId,gender,ageRange,created,stepsEstimate
0,2017-09-18,2421.0,5f772fc063993d0008607bb4,male,25-34,2020-10-02 13:48:48.153000+00:00,0.25
1,2017-09-19,5718.0,5f772fc063993d0008607bb4,male,25-34,2020-10-02 13:48:48.153000+00:00,0.25
2,2017-09-20,4542.0,5f772fc063993d0008607bb4,male,25-34,2020-10-02 13:48:48.153000+00:00,0.25
3,2017-09-21,6021.0,5f772fc063993d0008607bb4,male,25-34,2020-10-02 13:48:48.153000+00:00,0.25
4,2017-09-22,2970.0,5f772fc063993d0008607bb4,male,25-34,2020-10-02 13:48:48.153000+00:00,0.25
...,...,...,...,...,...,...,...
1361803,2023-05-22,165.0,64784cba2562860008d7f574,female,45-54,2023-06-01 07:46:02.263000+00:00,0.00
1361804,2023-05-25,18.0,64784cba2562860008d7f574,female,45-54,2023-06-01 07:46:02.263000+00:00,0.00
1361805,2023-05-26,4687.0,64784cba2562860008d7f574,female,45-54,2023-06-01 07:46:02.263000+00:00,0.00
1361806,2023-05-29,2481.0,64784cba2562860008d7f574,female,45-54,2023-06-01 07:46:02.263000+00:00,0.00


In [3]:
weekdata = data_to_df(week_data)
week_df = weekdata[0]
week_users_df = weekdata[1]

In [4]:
daydata = data_to_df(day_data)
day_df = daydata[0]
day_users_df = daydata[1]

In [5]:
%store week_df
%store week_users_df

Stored 'week_df' (DataFrame)
Stored 'week_users_df' (DataFrame)


In [4]:
#%store day_df
# %store day_users_df
%store estimates_df

Stored 'estimates_df' (DataFrame)


## Covid cases

In [7]:
# From Folkhälsomyndigheten, weekly cases nationwide summed up for each month https://www.folkhalsomyndigheten.se/smittskydd-beredskap/utbrott/aktuella-utbrott/covid-19/statistik-och-analyser/bekraftade-fall-i-sverige/

data = json.loads("""[
    { "date": "2019-03-15", "value": 0 },
    { "date": "2019-04-15", "value": 0 },
    { "date": "2019-05-15", "value": 0 },
    { "date": "2019-06-15", "value": 0 },
    { "date": "2019-07-15", "value": 0 },
    { "date": "2019-08-15", "value": 0 },
    { "date": "2019-09-15", "value": 0 },
    { "date": "2019-10-15", "value": 0 },
    { "date": "2019-11-15", "value": 0 },
    { "date": "2019-12-15", "value": 0 },
    { "date": "2020-01-15", "value": 0 },
    { "date": "2020-02-15", "value": 14 },
    { "date": "2020-03-15", "value": 7159 },
    { "date": "2020-04-15", "value": 15527 },
    { "date": "2020-05-15", "value": 15961 },
    { "date": "2020-06-15", "value": 31951 },
    { "date": "2020-07-15", "value": 6673 },
    { "date": "2020-08-15", "value": 8285 },
    { "date": "2020-09-15", "value": 10233 },
    { "date": "2020-10-15", "value": 37527 },
    { "date": "2020-11-15", "value": 161065 },
    { "date": "2020-12-15", "value": 207718 },
    { "date": "2021-01-15", "value": 92437 },
    { "date": "2021-02-15", "value": 99336 }
  ]""")
rows = []
for row in data:
    rows.append([parser.parse(row['date']), row['value']])

covid_cases = pd.DataFrame(rows, columns=['date', 'cases'])

%store covid_cases

Stored 'covid_cases' (DataFrame)


In [8]:
day_df

Unnamed: 0,id,gender,ageRange,occupation,compareDate,stepsEstimate,series,hour,steps,work_age,period
0,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,10,74.400000,False,Before
1,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,11,113.466667,False,Before
2,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,12,267.066667,False,Before
3,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,13,178.866667,False,Before
4,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,14,323.000000,False,Before
...,...,...,...,...,...,...,...,...,...,...,...
694051,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,05,10.445672,True,During
694052,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,06,364.363903,True,During
694053,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,07,979.696745,True,During
694054,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,08,424.530925,True,During


In [10]:
# Create simplified users df from day_df



Unnamed: 0,id,gender,ageRange,occupation,compareDate,stepsEstimate,series,hour,steps,work_age,period
0,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,10,74.400000,False,Before
1,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,11,113.466667,False,Before
2,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,12,267.066667,False,Before
3,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,13,178.866667,False,Before
4,5ee39a5f5e886e000876182a,,,,2020-03-16,,2019-12-16,14,323.000000,False,Before
...,...,...,...,...,...,...,...,...,...,...,...
694051,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,05,10.445672,True,During
694052,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,06,364.363903,True,During
694053,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,07,979.696745,True,During
694054,61af75effdde7a000853faad,Female,35-44,tjänsteman kommun,2020-03-16,-0.2,2021-02-16,08,424.530925,True,During
