# Setup

In [1]:
LOG_PLOT = False
NDAYS=100

In [2]:
%matplotlib notebook
from matplotlib import pyplot as plt
plt.style.use('seaborn-darkgrid')
import os
from helper import *
from datetime import datetime
from ecdc import ECDC
ecdc = ECDC()
df = ecdc.df
print(ecdc.df.dateRep.max())
print(df.columns)
EU = df[df.continentExp=='Europe']
AMERICA = df[(df.continentExp=='America')]


2020-12-02 00:00:00
Index(['dateRep', 'day', 'month', 'year', 'cases', 'deaths',
       'countriesAndTerritories', 'geoId', 'countryterritoryCode',
       'popData2019', 'continentExp',
       'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000'],
      dtype='object')


# Overview

In [3]:
df.sort_values(by='dateRep').groupby(by='countriesAndTerritories').agg({'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000':latest, 'cases':latest, 'deaths':'sum'}).fillna(0).sort_values(by='deaths',ascending=False).head(10)

Unnamed: 0_level_0,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000,cases,deaths
countriesAndTerritories,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
United_States_of_America,717.743788,180421,270645
Brazil,225.079404,50909,173817
India,42.922891,36604,138122
Mexico,87.171106,8819,106765
United_Kingdom,348.633261,13429,59051
Italy,634.247647,19347,56361
France,289.221999,8083,53506
Iran,226.111684,13881,48628
Spain,311.951792,8257,45511
Russia,243.639881,25345,41053


In [9]:
ecdc.overview(df).head(5)
f, ax = plt.subplots(ncols=2, figsize=(9,5), sharex=True)
deaths =ecdc.df.groupby('dateRep').agg({'deaths':'sum', 'cases': 'sum'})
ax[0].plot(deaths.index, deaths.deaths.cumsum(), label='total', c='k', linestyle='--')
ax[1].plot(deaths.index, deaths.cases.rolling(14).mean(), label='total', c='k', linestyle='--')
continents = ecdc.df.continentExp.unique()

for c in continents:
    if c=='Other': continue
    df_deaths =ecdc.df[ecdc.df.continentExp==c].groupby('dateRep').agg({'deaths':'sum', 'cases':'sum'})
    ax[0].plot(df_deaths.index, df_deaths.deaths.cumsum(), label=c.replace('_',' '))
    ax[1].plot(df_deaths.index, df_deaths.cases.rolling(14).mean(), label=c.replace('_',' '))
    
pretty_plot(ax[0], log=True,title='Deaths')
pretty_plot(ax[1], log=True,title='Cases')

<IPython.core.display.Javascript object>

# Austria, USA, India

In [4]:
# countries = ['AT', 'IN', 'US', 'UK', 'DE', 'NL', 'FR',  'IT', 'ES','EL' ]
countries = ['AT', 'IN', 'US', 'DE', 'NL', 'BE']
SELECTION =  ecdc.df[ecdc.df.geoId.isin(countries)]

ecdc.plot_comparison(countries, log=False, ndays=100)
ecdc.overview(SELECTION)
f,ax = plt.subplots(ncols=2, sharex=True, figsize=(9,5))
for c in countries:
    df = ecdc.select_country(c)
    popData2019 = df.iloc[0].popData2019
    label = df.iloc[0].countriesAndTerritories.replace('_',' ')

    ax[0].plot(df.dateRep, df.deaths.cumsum(), label=label)
    pretty_plot(ax[0],log=True, title='Deaths')
    ax[1].plot(df.dateRep, df.cases.rolling(14).mean(), label=label)
    pretty_plot(ax[1],log=True, title='Cases')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Austria and Neighbours

In [5]:
countries = ['AT', 'DE', 'CZ', 'SK',  'SI', 'HU', 'IT', 'CH']
ecdc.plot_comparison(countries, log=True, ndays=100)
f,ax = plt.subplots(ncols=2, sharex=True, figsize=(9,5))
for c in countries:
    df = ecdc.select_country(c)
    popData2019 = df.iloc[0].popData2019
    label = df.iloc[0].countriesAndTerritories.replace('_',' ')

    ax[0].plot(df.dateRep, df.deaths.cumsum(), label=label)
    pretty_plot(ax[0],log=True, title='Deaths')
    ax[1].plot(df.dateRep, df.cases.rolling(14).mean(), label=label)
    pretty_plot(ax[1],log=True, title='Cases')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Scandinavia

In [6]:
countries = ('SE','NO','DK','FI', 'IS')
ecdc.plot_comparison(countries, log=True, ndays=100)
f,ax = plt.subplots(ncols=2, sharex=True, figsize=(9,5))
for c in countries:
    df = ecdc.select_country(c)
    popData2019 = df.iloc[0].popData2019
    label = df.iloc[0].countriesAndTerritories.replace('_',' ')

    ax[0].plot(df.dateRep, df.deaths.cumsum(), label=label)
    pretty_plot(ax[0],log=True, title='Deaths')
    ax[1].plot(df.dateRep, df.cases.rolling(14).mean(), label=label)
    pretty_plot(ax[1],log=True, title='Cases')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Europe

In [7]:
EU_med = EU[EU.popData2019>1000000]
ecdc.overview(EU_med).sort_values(by=('Cumulative_number_for_14_days_of_COVID-19_cases_per_100000', 'latest'), ascending=False).head(5)
EU_most = EU_med.sort_values(['dateRep', 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000'], ascending=[False,False]).head(10)
geoIds = EU_med.sort_values(['dateRep', 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000'], ascending=[False,False]).head(5).geoId.to_list()
geoIds = sorted(set(['AT', *geoIds ]))
ecdc.plot_comparison(geoIds, log=False, ndays=45, field='Cumulative_number_for_14_days_of_COVID-19_cases_per_100000');

geoIds = EU_med.sort_values(['dateRep', 'cases'], ascending=[False,False]).head(5).geoId.to_list()
ecdc.plot_comparison(['AT', *geoIds ], log=False, ndays=45, field='cases', roll_days=14);

geoIds = EU_med.sort_values(['dateRep', 'deaths'], ascending=[False,False]).head(5).geoId.to_list()
ecdc.plot_comparison(['AT', *geoIds ], log=False, ndays=45, field='deaths', roll_days=14);



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>