<a href="https://colab.research.google.com/github/jfexbrayat/bokeh-covid/blob/main/bokeh_covid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Description
This notebook is used to display latest Covid incidence number reported by the Robert Koch Institute for Germany. It displays these data at the Landkreis level as an interactive map using `bokeh`.

The notebook downloads shapefile with Landkreis boundaries if this is not found when first running. 

Incidence data are fetched from the `Robert Koch-Institut API (v2)` with documentation available at https://api.corona-zahlen.org/docs/

In [1]:
# these two packages are not available in Google colab by default
!pip install pandas-bokeh geopandas

Collecting pandas-bokeh
  Downloading https://files.pythonhosted.org/packages/c6/08/a1fb196d9a58893bbf67b13a7db3973f39744f9bea0dca25a31656660569/pandas_bokeh-0.5.5-py2.py3-none-any.whl
Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/d7/bf/e9cefb69d39155d122b6ddca53893b61535fa6ffdad70bf5ef708977f53f/geopandas-0.9.0-py2.py3-none-any.whl (994kB)
[K     |████████████████████████████████| 1.0MB 7.8MB/s 
Collecting fiona>=1.8
[?25l  Downloading https://files.pythonhosted.org/packages/ea/2a/404b22883298a3efe9c6ef8d67acbf2c38443fa366ee9cd4cd34e17626ea/Fiona-1.8.19-cp37-cp37m-manylinux1_x86_64.whl (15.3MB)
[K     |████████████████████████████████| 15.3MB 237kB/s 
[?25hCollecting pyproj>=2.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/b1/72/d52e9ca81caef056062d71991b0e9b1d16af042245627c5d0e4916a36c4f/pyproj-3.0.1-cp37-cp37m-manylinux2010_x86_64.whl (6.5MB)
[K     |████████████████████████████████| 6.5MB 42.7MB/s 
Collecting click-plugins>=1

In [2]:
# imports
import geopandas as gpd
import pandas as pd
import json
import requests
import bokeh
import pandas_bokeh
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile

In [3]:
# this points bokeh to use the notebook
pandas_bokeh.output_notebook()

In [4]:
# get data from the API
rki_api = requests.get("https://api.corona-zahlen.org/districts/")
rki_json = json.loads(rki_api.text)

In [5]:
# add Berlin data
rki_berlin = requests.get('https://api.corona-zahlen.org/states/BE')
rki_berlin_json = json.loads(rki_berlin.text)
rki_json['data']['11000'] = rki_berlin_json['data']['BE']
rki_json['data']['11000']['county'] = 'Berlin'

In [6]:
# look at how the data looks like
rki_json['data']['11000']

{'abbreviation': 'BE',
 'cases': 162778,
 'casesPer100k': 4435.983083212358,
 'casesPerWeek': 4952,
 'county': 'Berlin',
 'deaths': 3174,
 'deathsPerWeek': 4,
 'delta': {'cases': 1135, 'deaths': 6, 'recovered': 988},
 'id': 11,
 'name': 'Berlin',
 'population': 3669491,
 'recovered': 149407,
 'weekIncidence': 134.95059669038565}

In [7]:
# create a dataframe with only variables of interest, indeed by landkreis
rki_df = pd.DataFrame(index=rki_json['data'].keys())
for key, val in rki_json['data'].items():
    for var in ['name', 'cases', 'deaths', 'weekIncidence', 'casesPer100k']:
        if var in val:
            rki_df.loc[key, var] = val[var]  

In [8]:
rki_df.tail()

Unnamed: 0,name,cases,deaths,weekIncidence,casesPer100k
9777,Ostallgäu,6232.0,119.0,207.533538,4414.160445
9778,Unterallgäu,5859.0,128.0,198.842722,4031.209363
9779,Donau-Ries,5157.0,150.0,259.375257,3854.749856
9780,Oberallgäu,5310.0,67.0,157.043229,3403.671607
11000,Berlin,162778.0,3174.0,134.950597,4435.983083


In [9]:
kreise_shp = Path('data/vg1000_01-01.utm32s.shape.ebenen/vg1000_ebenen_0101/VG1000_KRS.shp')
if not kreise_shp.exists():
    print("Downloading shapes")
    r = requests.get('https://daten.gdz.bkg.bund.de/produkte/vg/vg1000_ebenen_0101/aktuell/vg1000_01-01.utm32s.shape.ebenen.zip')

    with ZipFile(BytesIO(r.content)) as z:   
        z.extractall(path='data')

kreise_gdf = gpd.read_file(kreise_shp)

Downloading shapes


In [10]:
kreise_gdf["BEZGEN"] = kreise_gdf["BEZ"] + ' ' + kreise_gdf['GEN']

In [11]:
# assert test that all Kreise in the shapefile can be link to some RKI data
assert (kreise_gdf['AGS'].isin(rki_df.index)).sum() == kreise_gdf.shape[0]

In [12]:
# merge with incidence data
kreise_gdf = pd.merge(kreise_gdf, rki_df, left_on="AGS", right_index=True)

In [13]:
# quick look at the head
kreise_gdf.head()

Unnamed: 0,ADE,GF,BSG,ARS,AGS,SDV_ARS,GEN,BEZ,IBZ,BEM,NBD,SN_L,SN_R,SN_K,SN_V1,SN_V2,SN_G,FK_S3,NUTS,ARS_0,AGS_0,WSK,DEBKG_ID,RS,SDV_RS,RS_0,geometry,BEZGEN,name,cases,deaths,weekIncidence,casesPer100k
0,4,4,1,1001,1001,10010000000,Flensburg,Kreisfreie Stadt,40,--,ja,1,0,1,0,0,0,R,DEF01,10010000000,1001000,2008-01-01,DEBKGDL1000055UK,1001,10010000000,10010000000,"POLYGON ((527845.389 6073764.748, 527876.357 6...",Kreisfreie Stadt Flensburg,Flensburg,2152.0,39.0,33.272703,2386.761901
401,4,2,1,1001,1001,10010000000,Flensburg,Kreisfreie Stadt,40,--,ja,1,0,1,0,0,0,R,DEF01,10010000000,1001000,2008-01-01,DEBKGDL1000055V2,1001,10010000000,10010000000,"POLYGON ((531016.578 6076768.422, 531633.213 6...",Kreisfreie Stadt Flensburg,Flensburg,2152.0,39.0,33.272703,2386.761901
1,4,4,1,1002,1002,10020000000,Kiel,Kreisfreie Stadt,40,--,ja,1,0,2,0,0,0,R,DEF02,10020000000,1002000,2006-01-01,DEBKGDL1000055TK,1002,10020000000,10020000000,"POLYGON ((577156.386 6029965.651, 577409.884 6...",Kreisfreie Stadt Kiel,Kiel,4553.0,95.0,89.548368,1844.858465
402,4,2,1,1002,1002,10020000000,Kiel,Kreisfreie Stadt,40,--,ja,1,0,2,0,0,0,R,DEF02,10020000000,1002000,2006-01-01,DEBKGDL1000055WQ,1002,10020000000,10020000000,"MULTIPOLYGON (((574785.057 6024466.026, 574806...",Kreisfreie Stadt Kiel,Kiel,4553.0,95.0,89.548368,1844.858465
2,4,4,1,1003,1003,10030000000,Lübeck,Kreisfreie Stadt,40,--,ja,1,0,3,0,0,0,R,DEF03,10030000000,1003000,2006-02-01,DEBKGDL1000055W1,1003,10030000000,10030000000,"POLYGON ((623679.404 5980679.433, 623686.814 5...",Kreisfreie Stadt Lübeck,Lübeck,5193.0,93.0,53.110423,2398.281993


## 

In [14]:
# display data on an interactive map
kreise_gdf.plot_bokeh(
    category='weekIncidence', 
    hovertool=True,
    colormap=bokeh.palettes.magma(256)[::-1],
    hovertool_string=[
        ('', '@BEZGEN'),
        ('Incidence', '@weekIncidence'),
        ('Cases', '@cases'),
        ('Deaths', '@deaths'),
        ('Cases per 100k', '@casesPer100k{ }')
    ],
    legend=False,
    colormap_range=(0, 400),
    figsize=(900, 900)
);