### Import Libraries and Read Data

In [0]:
!pip install geopandas -qq
!pip install fiona -qq
!pip install folium -qq

In [0]:
!pip install selenium -qq
from selenium import webdriver
!wget https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-linux-x86_64.tar.bz2
!tar xvjf phantomjs-2.1.1-linux-x86_64.tar.bz2
!cp phantomjs-2.1.1-linux-x86_64/bin/phantomjs /usr/local/bin

In [0]:
## Import Libraries
import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
import geopandas
import folium
from io import StringIO

In [0]:
%matplotlib inline
plt.rcParams['figure.figsize'] = [15, 8]
pd.plotting.register_matplotlib_converters()
plt.style.use('seaborn-dark')

Fetch latest RKI Data

In [0]:
download_link = 'https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data'
# see https://www.arcgis.com/home/item.html?id=dd4580c810204019a7b8eb3e0b329dd6
# new source https://www.arcgis.com/home/item.html?id=f10774f1c63e40168479a1feb6c7ca74

In [0]:
req = requests.get(download_link)
rki_raw = pd.read_csv(StringIO(req.text))

# set index to datetime from 'Meldedatum'
rki_raw = rki_raw.set_index('Meldedatum')
rki_raw.index = pd.to_datetime(rki_raw.index)

Fetch GeoJSON to show 'Landkreise'

In [7]:
rki_geo_raw = geopandas.read_file('https://opendata.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0.geojson')
rki_geo = geopandas.read_file('https://opendata.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0.geojson')

rki_geo = rki_geo[['RS', 'geometry', 'EWZ']]

# add leading zero to RS to match 'IdLandkreis' from rki dataset
rki_geo['RS'] = rki_geo['RS'].astype(str).str.zfill(5)
rki_geo = rki_geo.set_index('RS')

rki_geo['geometry'] = rki_geo['geometry'].apply(lambda x: x.wkt).values

rki_geo.head()

Unnamed: 0_level_0,geometry,EWZ
RS,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,"POLYGON ((9.412664107904019 54.8226409068342, ...",89504
1002,"POLYGON ((10.1691581863434 54.4313767074573, 1...",247548
1003,"POLYGON ((10.8768349996186 53.9873651853629, 1...",217198
1004,"POLYGON ((9.99544612152712 54.1497192603092, 9...",79487
1051,MULTIPOLYGON (((8.69257691626515 54.0604453946...,133210


In [8]:
rki_raw.tail()

Unnamed: 0_level_0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,ObjectId,IdLandkreis,Datenstand,NeuerFall,NeuerTodesfall,Refdatum,NeuGenesen,AnzahlGenesen
Meldedatum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-03-29 00:00:00+00:00,16,Thüringen,LK Greiz,A35-A59,W,2,0,3883969,16076,"26.04.2020, 00:00 Uhr",0,-9,2020-03-25T00:00:00.000Z,0,2
2020-03-31 00:00:00+00:00,16,Thüringen,LK Greiz,A35-A59,W,1,0,3883970,16076,"26.04.2020, 00:00 Uhr",0,-9,2020-03-24T00:00:00.000Z,0,1
2020-03-31 00:00:00+00:00,16,Thüringen,LK Greiz,A35-A59,W,1,0,3883971,16076,"26.04.2020, 00:00 Uhr",0,-9,2020-03-27T00:00:00.000Z,0,1
2020-03-31 00:00:00+00:00,16,Thüringen,LK Greiz,A35-A59,W,1,0,3883972,16076,"26.04.2020, 00:00 Uhr",0,-9,2020-03-28T00:00:00.000Z,0,1
2020-04-01 00:00:00+00:00,16,Thüringen,LK Greiz,A35-A59,W,1,0,3883973,16076,"26.04.2020, 00:00 Uhr",0,-9,2020-03-26T00:00:00.000Z,0,1


In [0]:
def new_cases_by_date(rki_raw, rki_flag_column='Neuer Fall', rki_count_columns='AnzahlFall'):
    """
    rki_raw: raw DataFrame from Robert Koch Institute
    """
    condition = rki_raw[rki_flag_column].isin((0, 1))
    rki_series = rki_raw[condition].groupby(['IdLandkreis'])[rki_count_columns].sum().to_frame(name = rki_count_columns).reset_index()
    
    rki_series = rki_series[['IdLandkreis', 'AnzahlFall']]

    #join geodata
    rki_series['IdLandkreis'] = rki_series['IdLandkreis'].astype(str).str.zfill(5)
    rki_series = rki_series.set_index('IdLandkreis')
    rki_series = rki_series.join(rki_geo)

    return rki_series

Generate Dataset with cases from last 7 days by 100k for each period since 2020-03-01

In [10]:
start_date = pd.to_datetime('2020-03-01')
end_date = rki_raw.index.max().tz_convert(None)

frames = []

for j in pd.date_range(start_date, end_date - pd.to_timedelta(6,'d')):
    
    startrange = j.strftime('%Y-%m-%d')
    endrange = (j + pd.to_timedelta(6,'d')).strftime('%Y-%m-%d')
    rki_cases = new_cases_by_date(rki_raw[startrange:endrange], rki_flag_column='NeuerFall', rki_count_columns='AnzahlFall')
    rki_cases['date'] = (j + pd.to_timedelta(6,'d'))
    rki_cases['RS'] = rki_cases.index
    rki_cases['cases_per_100k'] = rki_cases['AnzahlFall'] / rki_cases['EWZ'] * 100000

    rki_cases = rki_cases.set_index('date')
    frames.append(rki_cases)

#rki_death = new_cases_by_date(rki_raw, rki_flag_column='NeuerTodesfall', rki_count_columns='AnzahlTodesfall')
#rki_recovered = new_cases_by_date(rki_raw, rki_flag_column='NeuGenesen', rki_count_columns='AnzahlGenesen')

result = pd.concat(frames)


Unnamed: 0_level_0,AnzahlFall,geometry,EWZ,RS,cases_per_100k
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-04-25,3,"POLYGON ((11.2629847411749 50.8332784741453, 1...",106356,16073,2.820715
2020-04-25,5,"POLYGON ((11.6979790584767 51.0835208927217, 1...",83051,16074,6.020397
2020-04-25,11,"POLYGON ((11.8298950832029 50.7892225977439, 1...",80868,16075,13.602414
2020-04-25,87,"POLYGON ((12.1916662745102 50.9635817588172, 1...",98159,16076,88.63171
2020-04-25,6,"POLYGON ((12.3492688881673 51.0966461467599, 1...",90118,16077,6.657937


In [52]:
# max value to adjust bin
result['cases_per_100k'].max()

571.0029791459782

In [0]:
#delete generated data
!rm *.png
!rm last_7_days_maps.zip

Generate maps for each period

In [53]:
import folium
import branca
import selenium

for j in pd.date_range(start_date, end_date - pd.to_timedelta(6,'d')):

  #bins = list(result['cases_per_100k'].quantile([0, 0.25, 0.5, 0.75, 1]))
  bins = [0, 5, 25, 50, 100, 600]

  map_osm = folium.Map(attr='Robert Koch-Institut (RKI), dl-de/by-2-0', location=[51.3, 10.5], tiles='cartodbpositron', zoom_start=7)

  folium.Choropleth(
      geo_data=rki_geo_raw,
      data=result[(j + pd.to_timedelta(6,'d')):(j + pd.to_timedelta(6,'d'))],
      columns=['RS', 'cases_per_100k'],
      key_on='feature.properties.RS',
      fill_color='YlOrRd',
      fill_opacity=0.6,
      line_opacity=0.0,
      nan_fill_color='#f5f5f3',
      legend_name='cases per 100k (last 7 days)',
      bins=[float(x) for x in bins],
      smooth_factor = 0.1
  ).add_to(map_osm)

  legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:42px;">&nbsp; ' + j.strftime('%Y-%m-%d') + ' - ' + (j + pd.to_timedelta(6,'d')).strftime('%Y-%m-%d') + '<br></div>'
  map_osm.get_root().html.add_child(folium.Element(legend_html))

  map_osm.save('map.html')

  # save map as png with headless browser
  driver = selenium.webdriver.PhantomJS()
  driver.set_window_size(1000, 1400)  # choose a resolution
  driver.get('map.html')

  driver.save_screenshot((j + pd.to_timedelta(6,'d')).strftime('%Y-%m-%d') + ".png")



In [54]:
!zip last_7_days_maps.zip *.png

updating: 2020-03-07.png (deflated 0%)
updating: 2020-03-08.png (deflated 0%)
updating: 2020-03-09.png (deflated 0%)
updating: 2020-03-10.png (deflated 0%)
updating: 2020-03-11.png (deflated 0%)
updating: 2020-03-12.png (deflated 0%)
updating: 2020-03-13.png (deflated 0%)
updating: 2020-03-14.png (deflated 0%)
updating: 2020-03-15.png (deflated 0%)
updating: 2020-03-16.png (deflated 0%)
updating: 2020-03-17.png (deflated 0%)
updating: 2020-03-18.png (deflated 0%)
updating: 2020-03-19.png (deflated 0%)
updating: 2020-03-20.png (deflated 0%)
updating: 2020-03-21.png (deflated 0%)
updating: 2020-03-22.png (deflated 0%)
updating: 2020-03-23.png (deflated 0%)
updating: 2020-03-24.png (deflated 0%)
updating: 2020-03-25.png (deflated 0%)
updating: 2020-03-26.png (deflated 0%)
updating: 2020-03-27.png (deflated 0%)
updating: 2020-03-28.png (deflated 0%)
updating: 2020-03-29.png (deflated 0%)
updating: 2020-03-30.png (deflated 0%)
updating: 2020-03-31.png (deflated 0%)
updating: 2020-04-01.png 

generate video from image sequence
https://gist.github.com/anguyen8/d0630b6aef6c1cd79b9a1341e88a573e

In [0]:
import folium
import branca

bins = [0, 5, 25, 50, 100, 500]
map_osm = folium.Map(attr="Robert Koch-Institut (RKI), dl-de/by-2-0", location=[51.3, 10.5], tiles='cartodbpositron', zoom_start=7)

#result.apply(lambda row:folium.GeoJson(row[1], fill_color=colorscale(row[0])).add_to(map_osm), axis=1)
folium.Choropleth(
    geo_data=rki_geo_raw,
    data=result['03-10-2020':'03-10-2020'],
    columns=['RS', 'cases_per_100k'],
    key_on='feature.properties.RS',
    fill_color='YlOrRd',
    fill_opacity=0.6,
    line_opacity=0.0,
    nan_fill_color='#f5f5f3',
    legend_name='cases per 100k',
    bins=[float(x) for x in bins],
    smooth_factor = 0.1
).add_to(map_osm)

legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:42px;">&nbsp; ' + '03-10-2020 - 03-17-2020' + '<br></div>'
map_osm.get_root().html.add_child(folium.Element(legend_html))

map_osm

In [55]:
rki_raw_a00 = rki_raw[rki_raw['Altersgruppe'].str.match('A00-A04')]
rki_raw_a05 = rki_raw[rki_raw['Altersgruppe'].str.match('A05-A14')]
rki_raw_a15 = rki_raw[rki_raw['Altersgruppe'].str.match('A15-A34')]
rki_raw_a35 = rki_raw[rki_raw['Altersgruppe'].str.match('A35-A59')]
rki_raw_a60 = rki_raw[rki_raw['Altersgruppe'].str.match('A60-A79')]
rki_raw_a80 = rki_raw[rki_raw['Altersgruppe'].str.match('A80+')]

rki_cases_a00 = new_cases_by_date(rki_raw_a00, rki_flag_column='NeuerFall', rki_count_columns='AnzahlFall').rename('A00-A04')
rki_cases_a05 = new_cases_by_date(rki_raw_a05, rki_flag_column='NeuerFall', rki_count_columns='AnzahlFall').rename('A05-A14')
rki_cases_a15 = new_cases_by_date(rki_raw_a15, rki_flag_column='NeuerFall', rki_count_columns='AnzahlFall').rename('A15-A34')
rki_cases_a35 = new_cases_by_date(rki_raw_a35, rki_flag_column='NeuerFall', rki_count_columns='AnzahlFall').rename('A35-A59')
rki_cases_a60 = new_cases_by_date(rki_raw_a60, rki_flag_column='NeuerFall', rki_count_columns='AnzahlFall').rename('A60-A79')
rki_cases_a80 = new_cases_by_date(rki_raw_a80, rki_flag_column='NeuerFall', rki_count_columns='AnzahlFall').rename('A80+')

rki_age_case = pd.concat([rki_cases_a00, rki_cases_a05, rki_cases_a15, rki_cases_a35, rki_cases_a60, rki_cases_a80], axis=1)

#rki_age_case.rename(columns_dict, axis=1, inplace=True)
#rki_age_case = rki_age_case.rename(index={0: 'A00-A04', 1: 'A05-A14',2: 'A15-A34',3: 'A35-A59',4: 'A60-A79',5: 'A80+'})
rki_age_case.tail()

#rki_age_case['03-05-2020':].plot.bar(stacked=True, colormap='Blues');


TypeError: ignored

In [0]:
rki = pd.concat([rki_cases, rki_death, rki_recovered], axis=1, sort=True)
columns_dict = {'AnzahlFall': 'NewCases', 'AnzahlTodesfall': 'NewDeath', 'AnzahlGenesen': 'NewRecovered'}
rki.rename(columns_dict, axis=1, inplace=True)
rki.fillna(value=0., inplace=True)
rki.tail()

In [0]:
rki['Cases'] = rki['NewCases'].cumsum()
rki['Death'] = rki['NewDeath'].cumsum()
rki['Recovered'] = rki['NewRecovered'].cumsum()
rki['ActiveCases'] = rki['Cases'] - rki['Recovered']
rki['Growth'] = rki['NewCases'] / rki['ActiveCases']
rki.tail(10)

In [0]:
rki.NewCases['03-05-2020':].plot.bar(stacked=True)

In [0]:
rki.Cases['03-01-2020':].plot.bar()

In [0]:
rki.Recovered['03-01-2020':].plot.bar()

In [0]:
rki.ActiveCases['03-01-2020':].plot.bar()

In [0]:
rki.Death['03-01-2020':].plot.bar()

In [0]:
rki.NewDeath['03-01-2020':].plot.bar()

In [0]:
start_date = '03-05-2020'
x = rki[start_date:].index

plt.bar(x, rki.Growth[start_date:], label='Growth')
plt.plot(x, rki.Growth.ewm(span=6,adjust=False).mean()[start_date:], label='Exponential Moving Average', color='red')
plt.legend(loc='best')
plt.title('Growth Germany')

In [0]:
rki.Growth['03-21-2020':].plot.bar()

In [0]:
start_date = '03-01-2020'
x = rki[start_date:].index

plt.bar(x, rki.NewCases[start_date:], label='New Cases')
plt.plot(x, rki.NewCases.ewm(span=3,adjust=False).mean()[start_date:], label='Exponential Moving Average', color='red')
plt.legend(loc='best')
plt.title('New Cases Germany')

In [0]:
rki.tail(20)