## Corona cases and deaths for Germany

data from https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0

In [3]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import requests
from os import walk
import dateutil.parser

In [4]:
def load_from_api():
    more_results = True
    dfs = []
    i = 0
    n_records = 5000
    while more_results:
        url = 'https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_COVID19/FeatureServer/0/query?'\
                'where=1%3D1'\
                '&outFields=*'\
                '&f=json'\
                f'&resultRecordCount={n_records}'\
                f'&resultOffset={i*n_records}'
        r = requests.get(url)
        payload = r.json()
        attributes = [record['attributes'] for record in payload['features']]
        df = pd.json_normalize(attributes)
        more_results = ('exceededTransferLimit' in payload)
        if more_results and df.shape[0] < n_records:
            raise Exception('Fewer records returned than expected.')
        i += 1
        print('.',end='')
        dfs.append(df)
    df_res = pd.concat(dfs)
    return df_res

def file_of_today_exists():
    (_, _, filenames) = next(walk('./RKI data'))
    for name in filenames:
        if name.startswith('RKI_COVID19') and \
                dateutil.parser.parse(name[12:22]).date() == datetime.now().date():
            return True, name
    return False, None

def load_RKI(once_per_day=True):
    exists, path = file_of_today_exists()
    if exists and once_per_day:
        path = './RKI data/' + path
        print(f'read file from path: {path}')
        df_res = pd.read_csv(path)
    else:
        df_res = load_from_api()
        df_res['Meldedatum'] = pd.to_datetime(df_res['Meldedatum'], unit='ms')
        df_res['Refdatum'] = pd.to_datetime(df_res['Refdatum'], unit='ms')
        df_res.to_csv('./RKI data/RKI_COVID19_{:%Y-%m-%d_%H}h.csv'.format(datetime.now()))
    return df_res

df_res = load_RKI()
df_res['Meldedatum'].max()

..........................

Timestamp('2020-05-03 00:00:00')

In [37]:
load_RKI(False)

.....................

Unnamed: 0,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,ObjectId,Meldedatum,IdLandkreis,Datenstand,NeuerFall,NeuerTodesfall,Refdatum,NeuGenesen,AnzahlGenesen
0,1,Schleswig-Holstein,SK Flensburg,A15-A34,M,1,0,2506526,2020-03-14,01001,"17.04.2020, 00:00 Uhr",0,-9,2020-03-16,0,1
1,1,Schleswig-Holstein,SK Flensburg,A15-A34,M,1,0,2506527,2020-03-19,01001,"17.04.2020, 00:00 Uhr",0,-9,2020-03-13,0,1
2,1,Schleswig-Holstein,SK Flensburg,A15-A34,M,1,0,2506528,2020-03-19,01001,"17.04.2020, 00:00 Uhr",0,-9,2020-03-16,0,1
3,1,Schleswig-Holstein,SK Flensburg,A15-A34,M,1,0,2506529,2020-03-21,01001,"17.04.2020, 00:00 Uhr",0,-9,2020-03-13,0,1
4,1,Schleswig-Holstein,SK Flensburg,A15-A34,M,1,0,2506530,2020-03-27,01001,"17.04.2020, 00:00 Uhr",0,-9,2020-03-22,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1182,16,Thüringen,LK Altenburger Land,A60-A79,W,1,0,2607708,2020-04-02,16077,"17.04.2020, 00:00 Uhr",0,-9,2020-03-31,-9,0
1183,16,Thüringen,LK Altenburger Land,A60-A79,W,1,0,2607709,2020-04-05,16077,"17.04.2020, 00:00 Uhr",0,-9,2020-03-31,0,1
1184,16,Thüringen,LK Altenburger Land,A60-A79,W,1,0,2607710,2020-04-16,16077,"17.04.2020, 00:00 Uhr",1,-9,2020-04-15,-9,0
1185,16,Thüringen,LK Altenburger Land,A80+,M,1,0,2607711,2020-03-24,16077,"17.04.2020, 00:00 Uhr",0,-9,2020-03-16,0,1
