In [None]:
import io
from functools import lru_cache

import altair as alt
import pandas as pd
import requests

import matplotlib.pyplot as plt

In [None]:
data_url = "https://raw.githubusercontent.com/thohan88/covid19-nor-data/HEAD/data"

@lru_cache()
def download_dataset(
    path,
    parse_dates=[
        "date",
    ],
    data_url=data_url,
):
    """Download a dataset from covid19-nor-data archive"""
    url = f"{data_url}/{path}"
    print(f"Downloading {url}")
    r = requests.get(f"{data_url}/{path}")
    r.raise_for_status()
    print(f"Downloaded {len(r.content) // 1024}kB")
    return pd.read_csv(io.BytesIO(r.content), parse_dates=parse_dates)


all_cases = download_dataset("01_infected/msis/municipality_and_district.csv")
all_cases

In [None]:
# aggregate data by fylke
cases = (
    all_cases.groupby(["fylke_name", "date"])[["cases", "population"]]
    .sum()
    .reset_index()
)
cases

In [None]:
# discard ukjent fylke where population 100k doesn't make sense
cases = cases[~cases.fylke_name.str.contains("Ukjent")]
cases

In [None]:
# 'cases' is a cumulative sum
# reverse that to calculate the daily new case count

# 1. createe a new column
cases["daily cases"] = 0

# 2. populate it per fylke
for fylke in cases.fylke_name.unique():
    mask = cases.fylke_name == fylke
    fylke_cases = cases.loc[mask]
    diff = fylke_cases.cases.diff()
    # set first value from cases
    diff.iloc[0] = fylke_cases.iloc[0].cases
    cases.loc[fylke_cases.index, "daily cases"] = diff.astype(int)

cases

# Add per-100,000 population column

In [None]:
# per100k is "daily new cases per 100k population"
cases["per100k"] = (
    (cases["daily cases"] * 1e5 / (cases["population"] + 1))
)
cases

## Exercise

plot cases per 1000

In [None]:
latest_cases = cases[-5000:]

In [None]:
alt.Chart(latest_cases)...

We can also do the same with matplotlib