In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil import parser
import requests
import json

pd.options.plotting.backend = "plotly"

%matplotlib inline

# Data setup

## Fetch data

In [None]:
# dataset = '12-months-before-after-2021-12-13'
# dataset = '12-months-before-after-2021-12-14'
# dataset = '24-before-24-after-2021-12-14'
# dataset = 'calendar-months-48-updated-2021-12-20' # period data
# dataset = 'before-after-2021-11-15'
# dataset = 'before-after-weekdays-2021-12-16'
# dataset = '2019-vs-after-2020-03-16-2021-12-22' # weekdays data
# dataset = '2019-vs-after-2020-03-16-min-100-days-2021-12-22'

dataset = '2019-vs-after-2020-03-16-weekday-2021-12-22' # day data

In [None]:
url = 'http://host.docker.internal:4000/analytics/dataset/' + dataset

In [None]:
response = requests.get(url).text

In [None]:
data = json.loads(response)

In [None]:
len(data['rows'])

## Format data

In [None]:
rows = []
for row in data['rows']:
    if (row['compareDate'] is None or row['compareDate'] == 'Invalid date'):
        continue
    user = [row['id'], row['gender'], row['ageRange'], row['occupation'], parser.parse(row['compareDate']), row['stepsEstimate']]
    for hour in row['rows']:
        rows.append(user + [hour['series'], hour['hour'], hour['value']])

df = pd.DataFrame(rows, columns=['id', 'gender', 'ageRange', 'occupation', 'compareDate', 'stepsEstimate', 'series', 'hour', 'steps'])

In [None]:
df

In [None]:
#fdf = df[(df.occupation.notnull())]

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
value = ''
df = df[(df.ageRange.notnull()) & (df.ageRange == value)] if value else df

In [None]:
work_ages = ['25-34', '35-44','45-54','55-64']
df['work_age'] = df.ageRange.isin(work_ages)

In [None]:
%store df

In [None]:
# From Folkhälsomyndigheten, weekly cases nationwide summed up for each month https://www.folkhalsomyndigheten.se/smittskydd-beredskap/utbrott/aktuella-utbrott/covid-19/statistik-och-analyser/bekraftade-fall-i-sverige/

data = json.loads("""[
    { "date": "2018-11-15", "value": 0 },
    { "date": "2018-12-15", "value": 0 },
    { "date": "2019-01-15", "value": 0 },
    { "date": "2019-02-15", "value": 0 },
    { "date": "2019-03-15", "value": 0 },
    { "date": "2019-04-15", "value": 0 },
    { "date": "2019-05-15", "value": 0 },
    { "date": "2019-06-15", "value": 0 },
    { "date": "2019-07-15", "value": 0 },
    { "date": "2019-08-15", "value": 0 },
    { "date": "2019-09-15", "value": 0 },
    { "date": "2019-10-15", "value": 0 },
    { "date": "2019-11-15", "value": 0 },
    { "date": "2019-12-15", "value": 0 },
    { "date": "2020-01-15", "value": 0 },
    { "date": "2020-02-15", "value": 14 },
    { "date": "2020-03-15", "value": 7159 },
    { "date": "2020-04-15", "value": 15527 },
    { "date": "2020-05-15", "value": 15961 },
    { "date": "2020-06-15", "value": 31951 },
    { "date": "2020-07-15", "value": 6673 },
    { "date": "2020-08-15", "value": 8285 },
    { "date": "2020-09-15", "value": 10233 },
    { "date": "2020-10-15", "value": 37527 },
    { "date": "2020-11-15", "value": 161065 },
    { "date": "2020-12-15", "value": 207718 },
    { "date": "2021-01-15", "value": 92437 },
    { "date": "2021-02-15", "value": 99336 },
    { "date": "2021-03-15", "value": 178998 },
    { "date": "2021-04-15", "value": 146055 },
    { "date": "2021-05-15", "value": 65547 },
    { "date": "2021-06-15", "value": 8215 },
    { "date": "2021-07-15", "value": 12613 },
    { "date": "2021-08-15", "value": 34179 },
    { "date": "2021-09-15", "value": 18942 },
    { "date": "2021-10-15", "value": 20232 },
    { "date": "2021-11-15", "value": 38639 }
  ]""")
rows = []
for row in data:
    rows.append([parser.parse(row['date']), row['value']])

covid_cases = pd.DataFrame(rows, columns=['date', 'cases'])

%store covid_cases