# Setup

In [52]:
%matplotlib notebook
LOG_PLOT = False

## Imports

In [53]:
from matplotlib import pyplot as plt
plt.style.use('seaborn-darkgrid')
import pandas as pd
from datetime import datetime

import requests
from json import loads, dumps

import os

## ECDC Data

In [84]:

# WORLD
def fetch_world():
    response = requests.get('https://opendata.ecdc.europa.eu/covid19/casedistribution/json')    
    json = response.json()
    df = pd.read_json(dumps(json['records']))   
    df['dateRep']=pd.to_datetime(df['dateRep'].astype(str), format='%d/%m/%Y')

    return df



def plot_selection(selection, roll_days=7, log=False):
    

    f,ax = plt.subplots()
    ax.plot(selection.dateRep, selection['cases'].rolling(roll_days).mean(), label=f'{roll_days} day average', marker='o', markersize=4, linestyle='--')
#     ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    plt.xticks(rotation=45)
    ax.bar(selection.dateRep, selection['cases'],label='recorded',  alpha=0.6, color='C1')
    plt.legend(loc='best')
    if log:
        plt.yscale('log')
        
    plt.title(selection.iloc[0].countriesAndTerritories.replace('_', ' '))
    plt.tight_layout()

    return f


def select_country(geoId):
    selection = df[df.geoId==geoId].sort_values(by=['year','month','day'], ascending=True)
    return selection

def plot_country(geoId, **kwargs):
    return plot_selection(select_country(geoId), **kwargs)
    
df = fetch_world()


## Austria - from Gesundheitsminesterium

In [72]:
# AUSTRIA
WEEKDAYS = ('Monday', 'Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday')

def fetch_Austria():
    df = pd.read_csv('https://info.gesundheitsministerium.at/data/Epikurve.csv', delimiter=';')
    weekday= []
    print(df.columns)
    for i, row in df.iterrows():
        day_num = datetime.strptime(row['time'], '%d.%m.%Y').weekday()
        weekday.append(day_num)
        
    df['weekday'] = weekday
    df['time']=pd.to_datetime(df['time'].astype(str), format='%d.%m.%Y')
#     .dt.strftime('%d/%m/%Y')

    return df

def plot_Austria(roll_days=7, log=False):
    df = fetch_Austria()
    f,ax = plt.subplots()
    ax.plot(df.time, df['tägliche Erkrankungen'].rolling(roll_days).mean(), label=f'{roll_days} day average', marker='o', markersize=4, linestyle='--')
    ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    plt.xticks(rotation=45)
    ax.bar(df.time, df['tägliche Erkrankungen'],label='recorded',  alpha=0.6, color='C1')
    plt.legend(loc='best')
    plt.title('Positive COVID tests')
    if log:
        plt.yscale('log')
    plt.tight_layout()
    return f

df_at  = fetch_Austria()


Index(['time', 'tägliche Erkrankungen', 'Timestamp'], dtype='object')


## Aggregation Functions

In [73]:
import numpy as np
def rolling_avg(x):
    return np.round(x.iloc[-7:].mean())

def latest(x):
    return x.iloc[-1]

def last_7_days_sum(x):
    return x.iloc[-7:].sum()
    
    
def overview(selection):
    '''
    Returns 
    - sum,     
    - last_7_days_sum: sum in the last 7 days, 
    - rolling_avg:  rolling average for the last 7 days, 
    - latest, and 
    - max 
    values for cases and deaths
    '''
    return selection.sort_values(
        by=['year','month','day'], ascending=True).groupby("countriesAndTerritories").agg(
        {'cases': ['sum', last_7_days_sum,  rolling_avg, latest,  'max' ],
        'deaths': ['sum',  last_7_days_sum,  rolling_avg, latest,  'max' ],
        'dateRep': ['min', 'max']} ).sort_values(by= ('cases','last_7_days_sum'),ascending=False)
    

## DF Subsets

In [74]:
SELECTION =  df[df.geoId.isin(['AT', 'IN', 'US', 'UK', 'DE', 'IT', 'NL'])]
EU = df[df.continentExp=='Europe']
AMERICA = df[(df.continentExp=='America')]


# Overview

## Selected Countries

In [75]:
overview(SELECTION)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
India,6225763,579753,82822,80472,97894,97497,7477,1068,1179,2003,2019-12-31,2020-09-30
United_States_of_America,7191061,294787,42112,43017,78427,205998,5191,742,928,4928,2019-12-31,2020-09-30
United_Kingdom,446156,42605,6086,7143,7143,42072,247,35,71,1224,2019-12-31,2020-09-30
Netherlands,117420,19278,2754,3001,3001,6384,102,15,13,234,2019-12-31,2020-09-30
Germany,289219,13292,1899,1798,6294,9488,79,11,17,315,2019-12-31,2020-09-30
Italy,313011,12114,1731,1647,6557,35875,137,20,24,971,2019-12-31,2020-09-30
Austria,44607,4710,673,722,1141,796,25,4,6,31,2019-12-31,2020-09-30


## Most cases in the last 7 days

### Europe

In [76]:
overview(EU).head(5)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
France,550690,82621,11803,8051,16096,31893,477,68,85,2004,2019-12-31,2020-09-30
Spain,758172,75905,10844,9906,31785,31614,710,101,203,1179,2019-12-31,2020-09-29
Russia,1167805,51995,7428,8232,11656,20545,896,128,160,312,2019-12-31,2020-09-30
United_Kingdom,446156,42605,6086,7143,7143,42072,247,35,71,1224,2019-12-31,2020-09-30
Ukraine,204932,23695,3385,3627,5863,4065,423,60,69,129,2020-03-04,2020-09-30


### America

In [77]:
overview(AMERICA).head(5)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
United_States_of_America,7191061,294787,42112,43017,78427,205998,5191,742,928,4928,2019-12-31,2020-09-30
Brazil,4777522,185918,26560,32058,69074,142921,4813,688,863,1595,2019-12-31,2020-09-30
Argentina,736596,84435,12062,13477,17213,16519,2567,367,406,573,2020-03-04,2020-09-30
Colombia,824042,46505,6644,5839,13056,25828,1258,180,187,400,2020-03-07,2020-09-30
Peru,811768,35222,5032,3054,10143,32396,810,116,72,3935,2020-03-07,2020-09-30


### World

In [78]:
overview(df).head(10)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
India,6225763,579753,82822,80472,97894,97497,7477,1068,1179,2003,2019-12-31,2020-09-30
United_States_of_America,7191061,294787,42112,43017,78427,205998,5191,742,928,4928,2019-12-31,2020-09-30
Brazil,4777522,185918,26560,32058,69074,142921,4813,688,863,1595,2019-12-31,2020-09-30
Argentina,736596,84435,12062,13477,17213,16519,2567,367,406,573,2020-03-04,2020-09-30
France,550690,82621,11803,8051,16096,31893,477,68,85,2004,2019-12-31,2020-09-30
Spain,758172,75905,10844,9906,31785,31614,710,101,203,1179,2019-12-31,2020-09-29
Russia,1167805,51995,7428,8232,11656,20545,896,128,160,312,2019-12-31,2020-09-30
Colombia,824042,46505,6644,5839,13056,25828,1258,180,187,400,2020-03-07,2020-09-30
Israel,236926,43552,6222,3661,11827,1528,243,35,21,71,2019-12-31,2020-09-30
United_Kingdom,446156,42605,6086,7143,7143,42072,247,35,71,1224,2019-12-31,2020-09-30


## Lowset number of new cases in the last 7 days

### Europe*

\*among countries with population of at least 1 million

In [79]:
overview(EU[EU.popData2019>1*10**6]).tail(5)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Finland,9892,697,100,158,267,345,4,1,0,43,2019-12-31,2020-09-30
Serbia,33479,480,69,65,467,749,6,1,1,18,2020-03-07,2020-09-30
Kosovo,15574,366,52,54,496,625,10,1,0,40,2020-03-14,2020-09-30
Estonia,3315,339,48,48,134,64,0,0,0,6,2019-12-31,2020-09-30
Latvia,1729,169,24,32,71,37,1,0,1,4,2020-03-03,2020-09-30


### America*

\*among countries with population of at least 10 million

In [80]:
overview(AMERICA[AMERICA.popData2019>1*10**7]).tail(5)


Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Guatemala,90968,4345,621,705,4233,3238,101,14,4,74,2020-03-15,2020-09-30
Bolivia,134641,3188,455,333,2687,7931,238,34,31,1610,2020-03-12,2020-09-30
Dominican_Republic,111900,2631,376,234,2147,2101,37,5,3,39,2019-12-31,2020-09-30
Cuba,5531,309,44,48,93,122,5,1,0,6,2020-03-12,2020-09-30
Haiti,8740,107,15,0,284,227,4,1,0,12,2020-03-20,2020-09-30


### World*
\*among countries with population of at least 10 million

In [81]:
overview( df[df.popData2019>1*10**7]).tail(5)


Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Taiwan,514,5,1,1,31,7,0,0,0,3,2019-12-31,2020-09-30
Niger,1196,3,0,0,69,69,0,0,0,5,2020-03-21,2020-09-30
Yemen,2035,3,0,0,116,588,1,0,0,52,2020-04-10,2020-09-30
Cambodia,277,2,0,0,35,0,0,0,0,0,2019-12-31,2020-09-30
United_Republic_of_Tanzania,509,0,0,0,174,21,0,0,0,6,2020-03-17,2020-09-30


# Austria
Compare ECDC data with Gesundheitsminesterium Data

## Plots

In [87]:
plot_Austria();
plot_country('AT', log=LOG_PLOT)

Index(['time', 'tägliche Erkrankungen', 'Timestamp'], dtype='object')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Last 7 days

In [69]:
df_at.agg(
        {'tägliche Erkrankungen': ['sum', last_7_days_sum,  rolling_avg, latest,  'max' ],
        'time': ['min', 'max']} )
    

Unnamed: 0,tägliche Erkrankungen,time
last_7_days_sum,4566.0,NaT
latest,645.0,NaT
max,1065.0,2020-09-29
min,,2020-02-25
rolling_avg,652.0,NaT
sum,44932.0,NaT


In [70]:
overview(df[df.geoId=='AT'])

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths,dateRep,dateRep
Unnamed: 0_level_1,sum,last_7_days_sum,rolling_avg,latest,max,sum,last_7_days_sum,rolling_avg,latest,max,min,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Austria,44607,4710,673,722,1141,796,25,4,6,31,2019-12-31,2020-09-30


### Weekday bias

In [61]:
f, ax = plt.subplots(nrows=2, sharex=True)
df_at.groupby(['weekday']).sum()['tägliche Erkrankungen'].plot(ax=ax[0], marker='o')
ax[0].set_ylabel('Reported positive tests (Total)')
df_at.groupby(['weekday']).median()['tägliche Erkrankungen'].plot(ax=ax[1], marker='o')
ax[1].set_ylabel('Reported positive tests (Median)')
ax[1].set_xlabel('Day of the week')
ax[1].set_xticks([0,1,2,3,4,5,6])
ax[1].set_xticklabels(WEEKDAYS, rotation=40)
plt.tight_layout()

<IPython.core.display.Javascript object>

# India

In [88]:
plot_country('IN', log=LOG_PLOT);

<IPython.core.display.Javascript object>

# USA

In [89]:
plot_country('US', log=LOG_PLOT);


<IPython.core.display.Javascript object>

# Germany

In [90]:
plot_country('DE', log=LOG_PLOT);

<IPython.core.display.Javascript object>

## UK

In [91]:
plot_country('UK', log=LOG_PLOT);

<IPython.core.display.Javascript object>

# China

In [85]:
plot_country('CN', log=LOG_PLOT);


<IPython.core.display.Javascript object>

# Commit and Push

In [67]:
%cd Documents/code/covid19/
now = datetime.now()
commit_message = "Last run on " + now.strftime('%Y-%m-%d %H:%M')
commit_message
! echo $commit_message > commit_message.txt
! git add .
! git commit -F commit_message.txt
! git push


[Errno 2] No such file or directory: 'Documents/code/covid19/'
/home/gythaogg/Documents/code/covid19/notebooks
[develop 5fcf30e] Last run on 2020-09-28 10:26
 3 files changed, 2522 insertions(+), 195 deletions(-)
 create mode 100644 notebooks/.ipynb_checkpoints/Austria_ECDC Data Differences-checkpoint.ipynb
 create mode 100644 notebooks/Austria_ECDC Data Differences.ipynb
Enumerating objects: 10, done.
Counting objects: 100% (10/10), done.
Delta compression using up to 16 threads
Compressing objects: 100% (6/6), done.
Writing objects: 100% (6/6), 476.99 KiB | 5.55 MiB/s, done.
Total 6 (delta 1), reused 0 (delta 0)
remote: Resolving deltas: 100% (1/1), completed with 1 local object.[K
To github.com:gythaogg/covid19.git
   e8ef7d5..5fcf30e  develop -> develop
