# Download Corona Data
This file downloads [this](https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data) current Corona data for Germany from the [RKI Website](https://www.rki.de/DE/Home/homepage_node.html).

**Note:** This is based on the Code from ExerciseSheet 2

In [1]:
# Package imports
import pandas as pd
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import datetime
from tqdm import tqdm

Download raw data from the RKI

In [2]:
# Link to current data of the RKI
url = "https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data"

# Read CSV data from URL
data_rki = pd.read_csv(url)
data_rki.head()

Unnamed: 0,FID,IdBundesland,Bundesland,Landkreis,Altersgruppe,Geschlecht,AnzahlFall,AnzahlTodesfall,Meldedatum,IdLandkreis,Datenstand,NeuerFall,NeuerTodesfall,Refdatum,NeuGenesen,AnzahlGenesen,IstErkrankungsbeginn,Altersgruppe2
0,1,1,Schleswig-Holstein,SK Flensburg,A00-A04,M,1,0,2020/09/30 00:00:00,1001,"20.01.2022, 00:00 Uhr",0,-9,2020/09/30 00:00:00,0,1,0,Nicht übermittelt
1,2,1,Schleswig-Holstein,SK Flensburg,A00-A04,M,1,0,2020/10/29 00:00:00,1001,"20.01.2022, 00:00 Uhr",0,-9,2020/10/29 00:00:00,0,1,0,Nicht übermittelt
2,3,1,Schleswig-Holstein,SK Flensburg,A00-A04,M,1,0,2020/11/03 00:00:00,1001,"20.01.2022, 00:00 Uhr",0,-9,2020/11/03 00:00:00,0,1,0,Nicht übermittelt
3,4,1,Schleswig-Holstein,SK Flensburg,A00-A04,M,1,0,2020/11/20 00:00:00,1001,"20.01.2022, 00:00 Uhr",0,-9,2020/11/19 00:00:00,0,1,1,Nicht übermittelt
4,5,1,Schleswig-Holstein,SK Flensburg,A00-A04,M,1,0,2020/11/23 00:00:00,1001,"20.01.2022, 00:00 Uhr",0,-9,2020/11/18 00:00:00,0,1,1,Nicht übermittelt


In [3]:
# Create new dataframe and sort by date
data_by_date = data_rki.copy()
data_by_date["Meldedatum"] = pd.to_datetime(data_by_date["Meldedatum"])
data_by_date.sort_values(by="Meldedatum", inplace=True)

Generate List of all Counties and Dates

In [4]:
counties = data_rki["Landkreis"].unique()
print(counties[:10])

dates = pd.date_range("2020-01-02", datetime.date.today(), freq="1D")
print(dates[:10])

['SK Flensburg' 'SK Kiel' 'SK Lübeck' 'SK Neumünster' 'LK Dithmarschen'
 'LK Herzogtum Lauenburg' 'LK Nordfriesland' 'LK Ostholstein'
 'LK Pinneberg' 'LK Plön']
DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05',
               '2020-01-06', '2020-01-07', '2020-01-08', '2020-01-09',
               '2020-01-10', '2020-01-11'],
              dtype='datetime64[ns]', freq='D')


Create a slimmed down dataset, with all the interesting features

In [5]:
# Cumulative case numbers over time
data_rki_cases = pd.DataFrame()
for county in tqdm(counties):
    data = data_by_date[(data_by_date["Landkreis"] == county) & (data_by_date["NeuerFall"] >= 0)]
    
    id_county = data["IdLandkreis"].iloc[0]
    name_county = data["Landkreis"].iloc[0]
    id_state = data["IdBundesland"].iloc[0]
    name_state = data["Bundesland"].iloc[0]
    date = data["Meldedatum"].iloc[0]
    
    data = data.groupby("Meldedatum").sum()
    
    date = data.index
    cases = data["AnzahlFall"]
    
    
    deaths = data_by_date[(data_by_date["Landkreis"] == county) & (data_by_date["NeuerTodesfall"] >= 0)] \
            .groupby("Meldedatum").sum()["AnzahlTodesfall"]
    
    data_county = pd.DataFrame({
        "id_county": id_county,
        "name_county": name_county,
        "id_state": id_state,
        "name_state": name_state,
        "cases": cases,
        "deaths": deaths,
    }, index = date)

    
    data_county = data_county.resample("1D").asfreq().reindex(dates).fillna(value={"cases": 0, "deaths": 0}).bfill().ffill()
    data_county["date"] = data_county.index
    data_county.index = range(len(data_county))
    data_county["cum_cases"] = data_county.cases.cumsum()
    data_county["cum_deaths"] = data_county.deaths.cumsum()
    
    data_rki_cases = data_rki_cases.append(data_county, ignore_index=True)

data_rki_cases.head()

  0%|          | 0/411 [00:00<?, ?it/s]


AttributeError: 'DataFrame' object has no attribute 'incidence'

Finally we save the dataset

In [None]:
data_rki_cases.to_csv("../dat/CoronaData.csv")