# Setup

In [38]:
%matplotlib notebook
LOG_PLOT = True

## Imports

In [66]:
from matplotlib import pyplot as plt
plt.style.use('seaborn-darkgrid')
import pandas as pd
from datetime import datetime

import requests
from json import loads, dumps

import os

## ECDC Data

In [40]:

# WORLD
def fetch_world():
    response = requests.get('https://opendata.ecdc.europa.eu/covid19/casedistribution/json')    
    json = response.json()
    df = pd.read_json(dumps(json['records']))   
    df['dateRep']=pd.to_datetime(df['dateRep'].astype(str), format='%d/%m/%Y')

    return df



def plot_country(geoId, roll_days=7, log=False):
    selection = df[df.geoId==geoId].sort_values(by=['year','month','day'], ascending=True)

    f,ax = plt.subplots()
    ax.plot(selection.dateRep, selection['cases'].rolling(roll_days).mean(), label=f'{roll_days} day average', marker='o', markersize=4, linestyle='--')
#     ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    plt.xticks(rotation=45)
    ax.bar(selection.dateRep, selection['cases'],label='recorded',  alpha=0.6, color='C1')
    plt.legend(loc='best')
    if log:
        plt.yscale('log')
        
    plt.title(selection.iloc[0].countriesAndTerritories.replace('_', ' '))
    plt.tight_layout()

    return f



df = fetch_world()


## Austria - from Gesundheitsminesterium

In [41]:
# AUSTRIA
WEEKDAYS = ('Monday', 'Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday')

def fetch_Austria():
    df = pd.read_csv('https://info.gesundheitsministerium.at/data/Epikurve.csv', delimiter=';')
    weekday= []
    print(df.columns)
    for i, row in df.iterrows():
        day_num = datetime.strptime(row['time'], '%d.%m.%Y').weekday()
        weekday.append(day_num)
        
    df['weekday'] = weekday
    df['time']=pd.to_datetime(df['time'].astype(str), format='%d.%m.%Y')
#     .dt.strftime('%d/%m/%Y')

    return df

def plot_Austria(roll_days=7, log=False):
    df = fetch_Austria()
    f,ax = plt.subplots()
    ax.plot(df.time, df['tägliche Erkrankungen'].rolling(roll_days).mean(), label=f'{roll_days} day average', marker='o', markersize=4, linestyle='--')
    ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    plt.xticks(rotation=45)
    ax.bar(df.time, df['tägliche Erkrankungen'],label='recorded',  alpha=0.6, color='C1')
    plt.legend(loc='best')
    plt.title('Positive COVID tests')
    if log:
        plt.yscale('log')
    plt.tight_layout()
    return f

df_at  = fetch_Austria()


Index(['time', 'tägliche Erkrankungen', 'Timestamp'], dtype='object')


## Aggregation Functions

In [42]:
import numpy as np
def rolling_avg(x):
    return np.round(x.iloc[-7:].mean())

def latest(x):
    return x.iloc[-1]

def last_7_days_sum(x):
    return x.iloc[-7:].sum()
    

## DF Subsets

In [43]:
SELECTION =  df[df.geoId.isin(['AT', 'IN', 'US', 'UK', 'DE', 'IT', 'NL'])]
EU = df[df.continentExp=='Europe']


# Overview

## Selected Countries

In [44]:
SELECTION.sort_values(
    by=['year','month','day'], ascending=True).groupby("countriesAndTerritories").agg(
    {'cases': ['sum', last_7_days_sum, latest, rolling_avg, 'max' ],
    'deaths': ['sum',  last_7_days_sum, latest, rolling_avg, 'max' ],} ).sort_values(by= ('cases','last_7_days_sum'),ascending=False)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_1,sum,last_7_days_sum,latest,rolling_avg,max,sum,last_7_days_sum,latest,rolling_avg,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
India,5732518,614265,86508,87752,97894,91149,7951,1129,1136,2003
United_States_of_America,6934204,303313,37930,43330,78427,201909,5107,1102,730,4928
United_Kingdom,409729,31510,6178,4501,6178,41862,178,37,25,1224
Netherlands,100493,14256,2351,2037,2351,6287,36,5,5,234
Germany,278070,12213,2143,1745,6294,9428,57,19,8,315
Italy,302537,11095,1640,1585,6557,35758,113,20,16,971
Austria,40558,5110,661,730,1141,777,19,6,3,31


## Most cases in the last 7 days

### World

In [47]:
df.sort_values(
    by=['year','month','day'], ascending=True).groupby("countriesAndTerritories").agg(
    {'cases': ['sum', last_7_days_sum, latest, rolling_avg, 'max' ],
    'deaths': ['sum',  last_7_days_sum, latest, rolling_avg, 'max' ],} ).sort_values(by= ('cases','last_7_days_sum'),ascending=False).head(10)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_1,sum,last_7_days_sum,latest,rolling_avg,max,sum,last_7_days_sum,latest,rolling_avg,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
India,5732518,614265,86508,87752,97894,91149,7951,1129,1136,2003
United_States_of_America,6934204,303313,37930,43330,78427,201909,5107,1102,730,4928
Brazil,4624885,205802,33281,29400,69074,138977,4871,869,696,1595
Argentina,664799,87474,12638,12496,17213,14376,2466,424,352,573
Spain,693556,79196,11289,11314,31428,31034,791,130,113,1179
France,481141,76253,13072,10893,13498,31459,414,43,59,2004
Colombia,784268,47891,6731,6842,13056,24746,1268,176,181,400
Russia,1122241,42722,6431,6103,12640,19799,882,150,126,312
Peru,782695,38295,6149,5471,10143,31870,819,284,117,3935
Israel,204690,32922,11316,4703,11316,1325,160,40,23,71


### Europe

In [24]:
EU.sort_values(
    by=['year','month','day'], ascending=True).groupby("countriesAndTerritories").agg(
    {'cases': ['sum', last_7_days_sum, latest, rolling_avg, 'max' ],
    'deaths': ['sum',  last_7_days_sum, latest, rolling_avg, 'max' ],} ).sort_values(by= ('cases','last_7_days_sum'),ascending=False).head(10)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_1,sum,last_7_days_sum,latest,rolling_avg,max,sum,last_7_days_sum,latest,rolling_avg,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Spain,693556,79196,11289,11314,31428,31034,791,130,113,1179
France,481141,76253,13072,10893,13498,31459,414,43,59,2004
Russia,1122241,42722,6431,6103,12640,19799,882,150,126,312
United_Kingdom,409729,31510,6178,4501,6178,41862,178,37,25,1224
Ukraine,184734,22074,3497,3153,5863,3705,365,63,52,129
Czechia,55464,14432,2306,2062,3123,555,73,24,10,24
Netherlands,100493,14256,2351,2037,2351,6287,36,5,5,234
Germany,278070,12213,2143,1745,6294,9428,57,19,8,315
Italy,302537,11095,1640,1585,6557,35758,113,20,16,971
Romania,116415,9404,1767,1343,1767,4550,265,47,38,60


## Lowset number of new cases in the last 7 days

## Europe*

\*among countries with population of at least 1 million

In [37]:
selection = EU[df.popData2019>1*10**6]
selection.sort_values(
    by=['year','month','day'], ascending=True).groupby("countriesAndTerritories").agg(
    {'cases': ['sum', last_7_days_sum, latest, rolling_avg, 'max' ],
    'deaths': ['sum',  last_7_days_sum, latest, rolling_avg, 'max' ],} ).sort_values(by= ('cases','last_7_days_sum'),ascending=False).tail(10)

  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,cases,cases,cases,cases,cases,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_1,sum,last_7_days_sum,latest,rolling_avg,max,sum,last_7_days_sum,latest,rolling_avg,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Bulgaria,19283,893,160,128,330,779,35,12,5,18
Azerbaijan,39524,866,146,124,590,580,11,2,2,13
Norway,13152,759,152,108,425,267,2,0,0,13
Slovenia,4694,734,136,105,137,133,2,0,0,6
Finland,9288,538,93,77,267,343,4,2,1,43
Lithuania,3932,490,73,70,122,87,0,0,0,6
Serbia,33080,467,81,67,467,744,8,1,1,18
Kosovo,15270,431,62,62,496,616,12,1,2,40
Estonia,3033,277,57,40,134,64,0,0,0,6
Latvia,1572,86,12,12,71,36,1,0,0,4


# Austria
Compare ECDC data with Gesundheitsminesterium Data

### Total number of cases

In [19]:
df_at['tägliche Erkrankungen'].sum(), df[df.geoId=='AT'].cases.sum()

(40853, 40558)

## Discrepancy between Gesundheitsministerium data and ECDC data

In [21]:
f,ax = plt.subplots()
# ax.bar(df_at.time, df_at['tägliche Erkrankungen'],label='recorded',  alpha=0.6, color='C1')

ax.plot(df_at.time, df_at['tägliche Erkrankungen'].rolling(7).mean(),  marker='o', markersize=4, linestyle='--', color='C1')

ax.bar(df_at.time, df_at['tägliche Erkrankungen'],label='AT',  alpha=0.6, color='C1')
selection = df[df.geoId=='AT'].sort_values(by=['year','month','day'], ascending=True)
ax.bar(selection.dateRep, selection['cases'],label='ECDH',  alpha=0.6, color='C0')
ax.plot(selection.dateRep, selection['cases'].rolling(7).mean(), marker='o', markersize=4, linestyle='--',color='C0')
# plt.yscale('log')

ax.xaxis.set_major_locator(plt.MaxNLocator(20));

plt.xticks(rotation=45);
plt.legend(loc='best')
plt.tight_layout()



<IPython.core.display.Javascript object>

### Weekday bias

In [20]:
f, ax = plt.subplots(nrows=2, sharex=True)
df_at.groupby(['weekday']).sum()['tägliche Erkrankungen'].plot(ax=ax[0], marker='o')
ax[0].set_ylabel('Reported positive tests (Total)')
df_at.groupby(['weekday']).median()['tägliche Erkrankungen'].plot(ax=ax[1], marker='o')
ax[1].set_ylabel('Reported positive tests (Median)')
ax[1].set_xlabel('Day of the week')
ax[1].set_xticks([0,1,2,3,4,5,6])
ax[1].set_xticklabels(WEEKDAYS, rotation=40)
plt.tight_layout()

<IPython.core.display.Javascript object>

# India

In [31]:
plot_country('IN', log=LOG_PLOT);

<IPython.core.display.Javascript object>

# USA

In [32]:
plot_country('US', log=LOG_PLOT);


<IPython.core.display.Javascript object>

# Germany

In [22]:
plot_country('DE', log=LOG_PLOT);

<IPython.core.display.Javascript object>

## UK

In [23]:
plot_country('UK', log=LOG_PLOT);

<IPython.core.display.Javascript object>

# China

In [37]:
plot_country('CN', log=LOG_PLOT);


<IPython.core.display.Javascript object>

# Commit and Push

In [78]:
%cd Documents/code/covid19/
now = datetime.now()
commit_message = "Last run on " + now.strftime('%Y-%m-%d %H:%M')
commit_message
! echo $commit_message > commit_message.txt
! git add .
! git commit -F commit_message.txt
! git push


[Errno 2] No such file or directory: 'Documents/code/covid19/'
/home/gythaogg/Documents/code/covid19
[develop 65646bd] Last run on 2020-09-24 13:09
 3 files changed, 179 insertions(+)
 create mode 100644 .gitignore
Enumerating objects: 10, done.
Counting objects: 100% (10/10), done.
Delta compression using up to 16 threads
Compressing objects: 100% (5/5), done.
Writing objects: 100% (6/6), 1023 bytes | 1023.00 KiB/s, done.
Total 6 (delta 1), reused 0 (delta 0)
remote: Resolving deltas: 100% (1/1), completed with 1 local object.[K
To github.com:gythaogg/covid19.git
   eb66a05..65646bd  develop -> develop
