# Gesundheitsministerium and ECDC Data

## Setup

### Imports

In [31]:
%matplotlib notebook

import logging
import calendar
import numpy as np
from datetime import timedelta
from matplotlib import pyplot as plt
from helper import *
from austria import Austria
AT = Austria()
logging.basicConfig(level=logging.INFO)
plt.style.use('seaborn-darkgrid')

df_ecdc = ecdc()
logging.info('ECDC: \n%s', df_ecdc.columns)

df_at = at()
logging.info('AT: \n%s', df_at.columns);


def plot_Austria(roll_days=7, log=False):
    df = df_at
    f,ax = plt.subplots()
    ax.plot(df.time, df['tägliche Erkrankungen'].rolling(roll_days).mean(), label=f'{roll_days} day average', marker='o', markersize=4, linestyle='--')
    ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    plt.xticks(rotation=45)
    ax.bar(df.time, df['tägliche Erkrankungen'],label='recorded',  alpha=0.6, color='C1')
    plt.legend(loc='best')
    plt.title('Positive COVID tests')
    if log:
        plt.yscale('log')
    plt.tight_layout()
    return f


INFO:root:ECDC: 
Index(['dateRep', 'day', 'month', 'year', 'cases', 'deaths',
       'countriesAndTerritories', 'geoId', 'countryterritoryCode',
       'popData2019', 'continentExp',
       'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000'],
      dtype='object')
INFO:root:AT: 
Index(['time', 'tägliche Erkrankungen', 'Timestamp', 'weekday'], dtype='object')


## Weekday Bias

In [32]:
f, ax = plt.subplots(nrows=2, sharex=True)
df_at.groupby(['weekday']).sum()['tägliche Erkrankungen'].plot(ax=ax[0], marker='o')
ax[0].set_ylabel('Reported positive tests (Total)')
df_at.groupby(['weekday']).median()['tägliche Erkrankungen'].plot(ax=ax[1], marker='o')
ax[1].set_ylabel('Reported positive tests (Median)')
ax[1].set_xlabel('Day of the week')
ax[1].set_xticks([0,1,2,3,4,5,6])
ax[1].set_xticklabels(WEEKDAYS, rotation=40)
plt.tight_layout()

<IPython.core.display.Javascript object>

## Comparison with ECDC data

### Number of cases

In [30]:
df_at['tägliche Erkrankungen'].sum(), df_ecdc[df_ecdc.geoId=='AT'].cases.sum()


(56310, 55983)

### Plots overlaid

In [31]:
f,ax = plt.subplots()
# ax.bar(df_at.time, df_at['tägliche Erkrankungen'],label='recorded',  alpha=0.6, color='C1')

ax.plot(df_at.time, df_at['tägliche Erkrankungen'].rolling(7).mean(),  marker='o', markersize=4, linestyle='--', color='C1')

ax.bar(df_at.time, df_at['tägliche Erkrankungen'],label='AT',  alpha=0.6, color='C1')
selection = df_ecdc[df_ecdc.geoId=='AT'].sort_values(by=['year','month','day'], ascending=True)
ax.bar(selection.dateRep, selection['cases'],label='ECDC',  alpha=0.6, color='C0')
ax.plot(selection.dateRep, selection['cases'].rolling(7).mean(), marker='o', markersize=4, linestyle='--',color='C0')
# plt.yscale('log')

ax.xaxis.set_major_locator(plt.MaxNLocator(20));

plt.xticks(rotation=45);
plt.legend(loc='best')
plt.tight_layout()



<IPython.core.display.Javascript object>

### Last 5 days

In [32]:
df_ecdc[df_ecdc.geoId=='AT'].sort_values('dateRep')[['dateRep', 'cases', 'deaths','popData2019', 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000' ]].tail(6)

Unnamed: 0,dateRep,cases,deaths,popData2019,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000
2596,2020-10-07,549,22,8858775.0,118.955499
2595,2020-10-08,947,4,8858775.0,122.183936
2594,2020-10-09,1221,9,8858775.0,128.200569
2593,2020-10-10,1181,4,8858775.0,129.442276
2592,2020-10-11,901,1,8858775.0,132.580408
2591,2020-10-12,1298,6,8858775.0,141.294931


In [33]:
df_at.sort_values('time').tail(5)

Unnamed: 0,time,tägliche Erkrankungen,Timestamp,weekday
225,2020-10-07,1285,2020-10-12T14:02:01,2
226,2020-10-08,1114,2020-10-12T14:02:01,3
227,2020-10-09,1130,2020-10-12T14:02:01,4
228,2020-10-10,817,2020-10-12T14:02:01,5
229,2020-10-11,787,2020-10-12T14:02:01,6


In [6]:
df_at.time.max()

Timestamp('2020-10-10 00:00:00')

In [7]:
df_at.agg(
        {'tägliche Erkrankungen': ['sum', last_7_days_sum,  rolling_avg, latest,  'max' ],
        'time': ['min', 'max']} )


Unnamed: 0,tägliche Erkrankungen,time
last_7_days_sum,6911.0,NaT
latest,743.0,NaT
max,1278.0,2020-10-10
min,,2020-02-25
rolling_avg,987.0,NaT
sum,55426.0,NaT


In [8]:
overview(df_ecdc[df_ecdc.geoId=='AT'])

Unnamed: 0_level_0,cases,cases,cases,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_1,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Austria,1052,1298,1298,864,51,7,6,31


## Number of Tests and Positives

In [4]:
case_numbers = AT.fall_zählen
case_timeline = AT.fälle_timeline_gkz

### Vienna

In [5]:
wien_cases = case_numbers[case_numbers.Bundesland=='Wien']
wien_timeline = case_timeline[case_timeline.Bezirk=='Wien']


In [35]:
wien_cases.iloc[0].MeldeDatum

Timestamp('2020-04-01 00:00:00')

In [9]:
f, ax = plt.subplots()
ax.plot(wien_cases.MeldeDatum, wien_cases.TestGesamt.diff().rolling(7).mean(), label='Tests')
ax.plot(wien_timeline.Time, wien_timeline.AnzahlFaelle.rolling(7).mean(), label='Cases')
ax.xaxis.set_major_locator(plt.MaxNLocator(20))
plt.legend(loc='best')
plt.yscale('log')

plt.xticks(rotation=45);
plt.tight_layout()

<IPython.core.display.Javascript object>

In [19]:
f, ax = plt.subplots()
ax.plot(wien_cases.MeldeDatum, wien_cases.FZHosp.rolling(7).mean(), label='Hospitalisations')
ax.plot(wien_cases.MeldeDatum, wien_cases.FZICU.rolling(7).mean(), label='ICU Occupancy')
ax.plot(wien_timeline.Time, wien_timeline.AnzahlFaelle.rolling(7).mean(), label='Positive tests')
ax.plot(wien_cases.MeldeDatum, wien_cases.TestGesamt.diff().rolling(7).mean(), label='Total Tests')

# ax.plot(df_at.time, df_at['tägliche Erkrankungen'].rolling(7).mean(), label='Epicurve')
# plt.yscale('log')
plt.legend(loc='best')
#  Index(['Time', 'Bezirk', 'GKZ', 'AnzEinwohner', 'AnzahlFaelle',
#        'AnzahlFaelleSum', 'AnzahlFaelle7Tage', 'SiebenTageInzidenzFaelle',
#        'AnzahlTotTaeglich', 'AnzahlTotSum', 'AnzahlGeheiltTaeglich',
#        'AnzahlGeheiltSum'],

#  Index(['Meldedat', 'TestGesamt', 'MeldeDatum', 'FZHosp', 'FZICU', 'FZHospFree','FZICUFree', 'BundeslandID', 'Bundesland'],


<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7f8813159ee0>

In [20]:
df_at.columns

Index(['time', 'tägliche Erkrankungen', 'Timestamp', 'weekday'], dtype='object')

### Ratio of positives to tests

In [21]:
# ax.plot(wien_cases.MeldeDatum, wien_cases.FZHosp.rolling(7).mean(), label='Hospitalisations')
# ax.plot(wien_cases.MeldeDatum, wien_cases.FZICU.rolling(7).mean(), label='ICU Occupancy')
# ax.plot(wien_timeline.Time, wien_timeline.AnzahlFaelle.rolling(7).mean(), label='Positive tests')
# ax.plot(wien_cases.MeldeDatum, wien_cases.TestGesamt.diff().rolling(7).mean(), label='Total Tests')
f,ax=plt.subplots()
ratio = np.array(wien_timeline[wien_timeline.Time.isin(wien_cases.MeldeDatum)].AnzahlFaelle)/np.array(wien_cases.TestGesamt)
ax.plot(wien_cases.MeldeDatum,ratio)
plt.yscale('log')

plt.xticks(rotation=45);
plt.tight_layout()

<IPython.core.display.Javascript object>

  ratio = np.array(wien_timeline[wien_timeline.Time.isin(wien_cases.MeldeDatum)].AnzahlFaelle)/np.array(wien_cases.TestGesamt)


In [22]:
len(wien_cases.TestGesamt.diff())

194

In [23]:
np.array(wien_cases.TestGesamt)

array([     0,  18880,  19618,  20740,  22144,  22798,  23659,  24869,
        26114,  27644,  28912,  29925,  30461,  31391,  32660,  33607,
        35603,  37256,  37256,  37750,  39880,  47541,  45520,  47206,
        48607,  50973,  51804,  53114,  55147,  56750,  58413,  59457,
        60943,  62364,  64413,  66538,  67738,  68859,  70226,  70738,
        70789,  75361,  76478,  78833,  80219,  81902,  82944,  83745,
        86247,  88196,  90228,  91303,  92920,  94212,  95237,  96431,
        98344,  99988, 101078, 104094, 106863, 106863, 108942, 111587,
       114552, 116506, 118882, 118882, 121824, 123664, 125689, 125689,
       129139, 130311, 131352, 132202, 133897, 137799, 139490, 142519,
       145016, 145983, 146786, 150203, 152490, 154337, 156426, 158812,
       160470, 161646, 163969, 167042, 169759, 171663, 173950, 175253,
       176467, 178128, 180057, 181638, 183457, 185980, 187002, 187002,
       189935, 192259, 194771, 196770, 198998, 200218, 201208, 202951,
      

## Corona Ampel

In [12]:
ampel = AT.ampel_aktuell

## ICU Beds

In [29]:
icu_occupancy = np.array(wien_cases.FZICU)
ndays = np.arange(len(icu_occupancy))
predict_days=100
future = np.arange(ndays[0],ndays[-1]+predict_days)
future_days = pd.date_range(start =wien_cases.iloc[0].MeldeDatum, end = wien_cases.iloc[-1].MeldeDatum + timedelta(days=predict_days-1)).to_list()

z = np.polyfit(ndays, icu_occupancy, 5)
p = np.poly1d(z)

f, ax = plt.subplots(figsize=(9,5))
ax.plot(future_days, p(future), marker='o', linestyle='--',markersize=2)
ax.plot(wien_cases.MeldeDatum, icu_occupancy, label='occupancy', marker='o', alpha=0.5,markersize=2)
ax.axhline(y=277,color='k')

plt.tight_layout()

<IPython.core.display.Javascript object>

In [18]:

# 


[Timestamp('2020-04-01 00:00:00', freq='D'),
 Timestamp('2020-04-02 00:00:00', freq='D'),
 Timestamp('2020-04-03 00:00:00', freq='D'),
 Timestamp('2020-04-04 00:00:00', freq='D'),
 Timestamp('2020-04-05 00:00:00', freq='D'),
 Timestamp('2020-04-06 00:00:00', freq='D'),
 Timestamp('2020-04-07 00:00:00', freq='D'),
 Timestamp('2020-04-08 00:00:00', freq='D'),
 Timestamp('2020-04-09 00:00:00', freq='D'),
 Timestamp('2020-04-10 00:00:00', freq='D'),
 Timestamp('2020-04-11 00:00:00', freq='D'),
 Timestamp('2020-04-12 00:00:00', freq='D'),
 Timestamp('2020-04-13 00:00:00', freq='D'),
 Timestamp('2020-04-14 00:00:00', freq='D'),
 Timestamp('2020-04-15 00:00:00', freq='D'),
 Timestamp('2020-04-16 00:00:00', freq='D'),
 Timestamp('2020-04-17 00:00:00', freq='D'),
 Timestamp('2020-04-18 00:00:00', freq='D'),
 Timestamp('2020-04-19 00:00:00', freq='D'),
 Timestamp('2020-04-20 00:00:00', freq='D'),
 Timestamp('2020-04-21 00:00:00', freq='D'),
 Timestamp('2020-04-22 00:00:00', freq='D'),
 Timestamp

## Gestorbene

In [39]:
from gestorbene import Gestorbene
G = Gestorbene()


### Overview

In [45]:
f, ax = plt.subplots(figsize=(9,5))
G.under_65.groupby(['week_end','sex']).sum()['F-ANZ-1'].rolling(12).mean().unstack().plot(ax=ax, alpha=0.7, marker='o', markersize=2)
plt.title('Under 65')
plt.tight_layout()


f, ax = plt.subplots(figsize=(9,5))
G.over_65.groupby(['week_end','sex']).sum()['F-ANZ-1'].rolling(12).mean().unstack().plot(ax=ax, alpha=0.7, marker='o', markersize=2)
plt.title('Over 65')
plt.tight_layout()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Monthly

In [67]:
f, ax = plt.subplots(figsize=(9,5))
G.under_65.groupby([G.under_65.week_end.dt.year, G.under_65.week_end.dt.month]).median()['F-ANZ-1'].plot(ax=ax, alpha=0.7, marker='o', markersize=2)
plt.title('Under 65')
# plt.xticks(np.arange(1,13), calendar.month_name[1:13], rotation=20)
plt.tight_layout()

f, ax = plt.subplots(figsize=(9,5))
G.over_65.groupby(G.over_65.week_end.dt.month).sum()['F-ANZ-1'].plot(ax=ax, alpha=0.7, marker='o', markersize=2)
plt.title('Over 65')
ax.xaxis.set_major_locator(plt.MaxNLocator(12))
plt.xticks(np.arange(1,13), calendar.month_name[1:13], rotation=20)
plt.tight_layout()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>