# Setup

In [1]:
LOG_PLOT = False
NDAYS=100
from matplotlib import cm


In [2]:
%matplotlib notebook
from matplotlib import pyplot as plt
plt.style.use('seaborn-darkgrid')
import os
from helper import *
from datetime import datetime
from ecdc import ECDC
ecdc = ECDC()
df = ecdc.df
print(ecdc.df.dateRep.max())
print(df.columns)
EU = df[df.continentExp=='Europe']
AMERICA = df[(df.continentExp=='America')]


2021-02-08 00:00:00
Index(['dateRep', 'year_week', 'cases_weekly', 'deaths_weekly',
       'countriesAndTerritories', 'geoId', 'countryterritoryCode',
       'popData2019', 'continentExp',
       'notification_rate_per_100000_population_14-days'],
      dtype='object')


# Overview

In [3]:
ecdc.overview(df).head(5)
f, ax = plt.subplots(ncols=2, figsize=(9,5), sharex=True)
deaths =ecdc.df.groupby('dateRep').agg({'deaths_weekly':'sum', 'cases_weekly': 'sum'})
ax[0].plot(deaths.index, deaths.deaths_weekly.cumsum(), label='total', c='k', linestyle='--')
ax[1].plot(deaths.index, deaths.cases_weekly.rolling(14).mean(), label='total', c='k', linestyle='--')
continents = ecdc.df.continentExp.unique()

for c in continents:
    if c=='Other': continue
    df_deaths =ecdc.df[ecdc.df.continentExp==c].groupby('dateRep').agg({'deaths_weekly':'sum', 
                                                                        'cases_weekly':'sum'})
    ax[0].plot(df_deaths.index, df_deaths.deaths_weekly.cumsum(), label=c.replace('_',' '))
    ax[1].plot(df_deaths.index, df_deaths.cases_weekly, label=c.replace('_',' '))
    
pretty_plot(ax[0], log=True,title='Deaths')
pretty_plot(ax[1], log=True,title='Cases')

<IPython.core.display.Javascript object>

## Most deaths (rolling average of absolute numbers)

In [4]:
df.sort_values(by='dateRep').groupby(by='countriesAndTerritories').agg({'cases_weekly':{rolling_avg, latest}, 'deaths_weekly':{'sum', rolling_avg, latest}}).fillna(0).sort_values(by=('deaths_weekly', 'sum'),ascending=False).head(5)

Unnamed: 0_level_0,cases_weekly,cases_weekly,deaths_weekly,deaths_weekly,deaths_weekly
Unnamed: 0_level_1,rolling_avg,latest,rolling_avg,sum,latest
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
United_States_of_America,1309104.0,687440,20830.285714,463482,20127
Brazil,326351.4,318757,6411.285714,232170,7071
Mexico,87156.86,66305,6876.142857,166731,7631
India,111804.9,80584,1324.285714,155080,688
United_Kingdom,274233.9,142608,6485.285714,112798,6640


## Most cases (cumulative - absolute numbers)

In [5]:
df.sort_values(by='dateRep').groupby(by='countriesAndTerritories').agg({'cases_weekly':{rolling_avg, latest}, 'deaths_weekly':{'sum', rolling_avg, latest}}).fillna(0).sort_values(by=('deaths_weekly', 'sum'),ascending=False).head(5)

Unnamed: 0_level_0,cases_weekly,cases_weekly,deaths_weekly,deaths_weekly,deaths_weekly
Unnamed: 0_level_1,rolling_avg,latest,rolling_avg,sum,latest
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
United_States_of_America,1309104.0,687440,20830.285714,463482,20127
Brazil,326351.4,318757,6411.285714,232170,7071
Mexico,87156.86,66305,6876.142857,166731,7631
India,111804.9,80584,1324.285714,155080,688
United_Kingdom,274233.9,142608,6485.285714,112798,6640


# Austria, USA, India

In [6]:
countries = ['AT', 'IN', 'US', 'DE', 'UK', 'NL', 'SE']
SELECTION =  ecdc.df[ecdc.df.geoId.isin(countries)]

ecdc.plot_comparison(countries, log=False, ndays=100)
ecdc.overview(SELECTION)
f,ax = plt.subplots(ncols=2, sharex=True, figsize=(9,5))
for c in countries:
    df = ecdc.select_country(c)
    popData2019 = df.iloc[0].popData2019
    label = df.iloc[0].countriesAndTerritories.replace('_',' ')

    ax[0].plot(df.dateRep, df.deaths_weekly.cumsum(), label=label)
    pretty_plot(ax[0],log=False, title='Deaths')
    ax[1].plot(df.dateRep, df.cases_weekly.rolling(14).mean(), label=label)
    pretty_plot(ax[1],log=False, title='Cases')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Austria and Neighbours

In [13]:
countries = ['AT', 'DE', 'CZ', 'SK',  'SI', 'HU', 'IT', 'CH', 'LI']
ecdc.plot_comparison(countries, log=False, ndays=300)
f,ax = plt.subplots(ncols=2, sharex=True, figsize=(9,5))
for c in countries:
    df = ecdc.select_country(c)
    popData2019 = df.iloc[0].popData2019
    label = df.iloc[0].countriesAndTerritories.replace('_',' ')

    ax[0].plot(df.dateRep, df.deaths_weekly.cumsum(), label=label)
    pretty_plot(ax[0],log=False, title='Deaths')
    ax[1].plot(df.dateRep, df.cases_weekly, label=label)
    pretty_plot(ax[1],log=False, title='Cases')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Austria and Scandinavia

In [7]:
countries = ('AT','SE','NO','DK','FI', 'IS')
ecdc.plot_comparison(countries, log=False, ndays=300)
f,ax = plt.subplots(ncols=2, sharex=True, figsize=(9,5))
for c in countries:
    df = ecdc.select_country(c)
    popData2019 = df.iloc[0].popData2019
    label = df.iloc[0].countriesAndTerritories.replace('_',' ')

    ax[0].plot(df.dateRep, df.deaths_weekly.cumsum(), label=label)
    pretty_plot(ax[0],log=False, title='Deaths')
    ax[1].plot(df.dateRep, df.cases_weekly, label=label)
    pretty_plot(ax[1],log=False, title='Cases')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Europe

In [8]:
EU_med = EU[EU.popData2019>5000000]
ecdc.overview(EU_med).sort_values(by=('notification_rate_per_100000_population_14-days', 'latest'), ascending=False).head(5)


Unnamed: 0_level_0,dateRep,cases_weekly,cases_weekly,cases_weekly,deaths_weekly,deaths_weekly,deaths_weekly,deaths_weekly,deaths_weekly,notification_rate_per_100000_population_14-days,notification_rate_per_100000_population_14-days
Unnamed: 0_level_1,latest,rolling_avg,last_5_days,max,sum,last_7_days_sum,rolling_avg,last_5_days,max,latest,max
countriesAndTerritories,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Portugal,2021-02-08,55899.0,"57670, 67210, 86610, 83208, 39093",86610,14158,8024,1146.285714,"729, 1103, 1693, 2036, 1401",2036,1190.09,1652.47
Czechia,2021-02-08,58554.571429,"88740, 56398, 48152, 47325, 50076",88740,17333,6922,988.857143,"1202, 1177, 1004, 939, 941",1429,914.58,1575.23
Spain,2021-02-08,167119.428571,"152938, 224669, 256931, 229423, 166280",256931,62295,13035,1862.142857,"1197, 1494, 2439, 2873, 3214",5890,843.05,1036.18
Slovakia,2021-02-08,16106.714286,"20970, 15316, 12642, 12886, 14170",20970,5271,3716,530.857143,"486, 519, 542, 574, 629",642,496.4,751.81
United_Kingdom,2021-02-08,274233.857143,"417570, 323610, 251504, 169713, 142608",417570,112798,45397,6485.285714,"6407, 7830, 8678, 8219, 6640",8678,468.62,1114.26


In [9]:
EU_med.sort_values(['dateRep', 'cases_weekly'], ascending=[False,False]).head(10)

Unnamed: 0,dateRep,year_week,cases_weekly,deaths_weekly,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,notification_rate_per_100000_population_14-days
9057,2021-02-08,2021-05,166280,3214,Spain,ES,ESP,46937060.0,Europe,843.05
10026,2021-02-08,2021-05,142608,6640,United_Kingdom,UK,GBR,66647112.0,Europe,468.62
3494,2021-02-08,2021-05,139934,2908,France,FR,FRA,67012883.0,Europe,422.95
8053,2021-02-08,2021-05,115110,3449,Russia,RU,RUS,145872260.0,Europe,167.62
4985,2021-02-08,2021-05,83706,2757,Italy,IT,ITA,60359546.0,Europe,281.52
3752,2021-02-08,2021-05,66574,4555,Germany,DE,DEU,83019213.0,Europe,176.92
9779,2021-02-08,2021-05,53993,804,Turkey,TR,TUR,82003882.0,Europe,124.2
2567,2021-02-08,2021-05,50076,941,Czechia,CZ,CZE,10649800.0,Europe,914.58
7793,2021-02-08,2021-05,39301,1952,Poland,PL,POL,37972812.0,Europe,196.37
7842,2021-02-08,2021-05,39093,1401,Portugal,PT,PRT,10276617.0,Europe,1190.09


In [10]:
print(3851*100000/83019213.0,3851/7 )
EU_med.sort_values(['dateRep', 'deaths_weekly'], ascending=[False,False]).head(10)


4.638685264337545 550.1428571428571


Unnamed: 0,dateRep,year_week,cases_weekly,deaths_weekly,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,notification_rate_per_100000_population_14-days
10026,2021-02-08,2021-05,142608,6640,United_Kingdom,UK,GBR,66647112.0,Europe,468.62
3752,2021-02-08,2021-05,66574,4555,Germany,DE,DEU,83019213.0,Europe,176.92
8053,2021-02-08,2021-05,115110,3449,Russia,RU,RUS,145872260.0,Europe,167.62
9057,2021-02-08,2021-05,166280,3214,Spain,ES,ESP,46937060.0,Europe,843.05
3494,2021-02-08,2021-05,139934,2908,France,FR,FRA,67012883.0,Europe,422.95
4985,2021-02-08,2021-05,83706,2757,Italy,IT,ITA,60359546.0,Europe,281.52
7793,2021-02-08,2021-05,39301,1952,Poland,PL,POL,37972812.0,Europe,196.37
7842,2021-02-08,2021-05,39093,1401,Portugal,PT,PRT,10276617.0,Europe,1190.09
2567,2021-02-08,2021-05,50076,941,Czechia,CZ,CZE,10649800.0,Europe,914.58
9779,2021-02-08,2021-05,53993,804,Turkey,TR,TUR,82003882.0,Europe,124.2


In [11]:

EU_most = EU_med.sort_values(['dateRep', 'notification_rate_per_100000_population_14-days'], ascending=[False,False]).head(10)
geoIds = EU_med.sort_values(['dateRep', 'notification_rate_per_100000_population_14-days'], ascending=[False,False]).head(5).geoId.to_list()
geoIds = sorted(set(['AT', *geoIds ]))
ecdc.plot_comparison(geoIds, log=False, ndays=45, field='notification_rate_per_100000_population_14-days');

geoIds = EU_med.sort_values(['dateRep', 'cases_weekly'], ascending=[False,False]).head(5).geoId.to_list()
ecdc.plot_comparison(['AT', *geoIds ], log=False, ndays=45, field='cases_weekly', roll_days=1);

geoIds = EU_med.sort_values(['dateRep', 'deaths_weekly'], ascending=[False,False]).head(5).geoId.to_list()
ecdc.plot_comparison(['AT', *geoIds ], log=False, ndays=45, field='deaths_weekly', roll_days=1);



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Plot by weekday

In [12]:
def plot_cases_by_day_of_the_week(selection, col='cases_weekly', num_weeks_history=15, title=''):
    f, ax = plt.subplots(figsize=(9, 5))
    selection = selection.groupby('dateRep', as_index=False).agg({col:'sum'})
    grouped = selection.groupby(selection.dateRep.dt.month,as_index=False).agg(
                                        ('sum', 'max', 'min', 'median',
                                         'mean', latest))

    ax.scatter(grouped.index,
               grouped[(col, 'max')],
               label='max',
               marker='D',
               s=50,
               c='k')
    ax.scatter(grouped.index,
               grouped[(col, 'median')],
               label='median',
               marker='D')
    ax.scatter(grouped.index,
               grouped[(col, 'min')],
               label='min',
               marker='D')

    last_n_weeks = selection.sort_values(
        'dateRep').dateRep.dt.month.unique()[-num_weeks_history:]
    print(last_n_weeks)
    viridis = cm.get_cmap('viridis', num_weeks_history)

    for i, w in enumerate(last_n_weeks):
        df = selection[selection.dateRep.dt.month == w]
        ax.plot(df.dateRep,
                df[col],
                label=f'Month #{w}',
                marker='o',
                linestyle='--',
                alpha=1,
               color=viridis.colors[i])

    if title:
        plt.title(title)
            

#     pretty_plot(ax, log=False, num_x_locators=7)
    if num_weeks_history<20:
        plt.legend(bbox_to_anchor=(1, 1), ncol=1, loc='upper left');
        
    plt.tight_layout()

    return ax

NUM_WEEKS = 15

In [14]:
plot_cases_by_day_of_the_week(ecdc.select_country('IN'), col='cases_weekly', title='India - cases',num_weeks_history=NUM_WEEKS);
plot_cases_by_day_of_the_week(ecdc.select_country('IN'), col='deaths_weekly', title='India - deaths',num_weeks_history=NUM_WEEKS);


<IPython.core.display.Javascript object>

[ 1  2  3  4  5  6  7  8  9 10 11 12]


<IPython.core.display.Javascript object>

[ 1  2  3  4  5  6  7  8  9 10 11 12]


In [15]:
plot_cases_by_day_of_the_week(ecdc.select_country('US'), col='cases', title='USA - cases', num_weeks_history=NUM_WEEKS )
plot_cases_by_day_of_the_week(ecdc.select_country('US'), col='deaths', title='USA - deaths',num_weeks_history=NUM_WEEKS)


<IPython.core.display.Javascript object>

SpecificationError: Column(s) ['cases'] do not exist

In [16]:
selection = ecdc.df[ecdc.df.continentExp=='Europe']
plot_cases_by_day_of_the_week(selection, title='Europe - cases', col='cases',num_weeks_history=NUM_WEEKS)
plot_cases_by_day_of_the_week(selection, title='Europe - deaths', col='deaths',num_weeks_history=NUM_WEEKS)


<IPython.core.display.Javascript object>

SpecificationError: Column(s) ['cases'] do not exist

In [17]:
plot_cases_by_day_of_the_week(ecdc.select_country('DE'), col='cases', title='Germany - cases',num_weeks_history=NUM_WEEKS )
plot_cases_by_day_of_the_week(ecdc.select_country('DE'), col='deaths', title='Germany - deaths',num_weeks_history=NUM_WEEKS)


<IPython.core.display.Javascript object>

SpecificationError: Column(s) ['cases'] do not exist

In [18]:
plot_cases_by_day_of_the_week(ecdc.select_country('NL'), col='cases', title='Netherlands - cases' ,num_weeks_history=30)
plot_cases_by_day_of_the_week(ecdc.select_country('NL'), col='deaths', title='Netherlands - deaths',num_weeks_history=40)


<IPython.core.display.Javascript object>

SpecificationError: Column(s) ['cases'] do not exist

In [19]:
plot_cases_by_day_of_the_week(ecdc.select_country('UK'), col='cases', title='UK - cases' )
plot_cases_by_day_of_the_week(ecdc.select_country('UK'), col='deaths', title='UK - deaths')


<IPython.core.display.Javascript object>

SpecificationError: Column(s) ['cases'] do not exist

In [20]:
plot_cases_by_day_of_the_week(ecdc.select_country('AT'), col='cases', title='Austria - cases',num_weeks_history=NUM_WEEKS )
plot_cases_by_day_of_the_week(ecdc.select_country('AT'), col='deaths', title='Austria - deaths',num_weeks_history=NUM_WEEKS)


<IPython.core.display.Javascript object>

SpecificationError: Column(s) ['cases'] do not exist

In [21]:
plot_cases_by_day_of_the_week(ecdc.select_country('IT'), col='cases', title='Italy - cases', num_weeks_history=NUM_WEEKS)
plot_cases_by_day_of_the_week(ecdc.select_country('IT'), col='deaths', title='Italy - deaths',num_weeks_history=NUM_WEEKS)


<IPython.core.display.Javascript object>

SpecificationError: Column(s) ['cases'] do not exist