In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import matplotlib.pyplot as plt
%matplotlib inline
import dateutil.parser
import math
from datetime import date
from datetime import datetime, timedelta
from vega_datasets import data
import altair as alt
alt.data_transformers.disable_max_rows()
alt.data_transformers.enable('json')
import seaborn as sns
import re

In [2]:
today = date.today()
print(today)

2020-10-09


In [3]:
# source: https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide
ecdc = pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", encoding = "utf-8")
ecdc.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000
0,09/10/2020,9,10,2020,77,2,Afghanistan,AF,AFG,38041757.0,Asia,1.332746
1,08/10/2020,8,10,2020,68,1,Afghanistan,AF,AFG,38041757.0,Asia,1.172396
2,07/10/2020,7,10,2020,62,2,Afghanistan,AF,AFG,38041757.0,Asia,1.059362
3,06/10/2020,6,10,2020,145,5,Afghanistan,AF,AFG,38041757.0,Asia,1.08302
4,05/10/2020,5,10,2020,44,0,Afghanistan,AF,AFG,38041757.0,Asia,0.780721


In [4]:
#excluding NANs
countries_list = ecdc[ecdc['countryterritoryCode'].notnull()]['countryterritoryCode'].unique()


In [5]:
len(countries_list)

209

# configure date to be datetime

In [6]:
ecdc['parsed_date'] = [re.sub(r'(\d\d)\/(\d\d)\/(\d\d\d\d)','\g<3>-\g<2>-\g<1>', date) for date in ecdc['dateRep']]
ecdc.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000,parsed_date
0,09/10/2020,9,10,2020,77,2,Afghanistan,AF,AFG,38041757.0,Asia,1.332746,2020-10-09
1,08/10/2020,8,10,2020,68,1,Afghanistan,AF,AFG,38041757.0,Asia,1.172396,2020-10-08
2,07/10/2020,7,10,2020,62,2,Afghanistan,AF,AFG,38041757.0,Asia,1.059362,2020-10-07
3,06/10/2020,6,10,2020,145,5,Afghanistan,AF,AFG,38041757.0,Asia,1.08302,2020-10-06
4,05/10/2020,5,10,2020,44,0,Afghanistan,AF,AFG,38041757.0,Asia,0.780721,2020-10-05


In [7]:
ecdc['parsed_date'] = [datetime.strptime(date, '%Y-%m-%d') for date in ecdc['parsed_date']]

In [8]:
# check whether latest date is today
ecdc['parsed_date'][0] == datetime.strptime(str(today),'%Y-%m-%d')

True

# Classify countries

In [9]:
moredouble = '01_moredouble'
more = '02_more'
samesame ='03_samesame'
less = '04_less'
lesshalf = '05_lesshalf'
zerozero ='06_zerozero'

In [10]:
threshold_samesame = 7

In [11]:
list_country_dicts = []
pd_today = pd.to_datetime(today)
for country in countries_list:
    country_dict = {}
    #filter dataframe for one specific country at a time
    filtered_aggregates = ecdc[ecdc['countryterritoryCode']==country]
    country_dict['country_ISO'] = country
    #filter country-specific-dataframe to only contain the last seven days ("this week")
    seven_days_ago = pd.to_datetime(today-timedelta(7))
    country_dict['today'] = today
    country_dict['seven_days_ago'] = seven_days_ago
    filtered_aggregates_this_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=pd_today) & (filtered_aggregates['parsed_date']>seven_days_ago)]
    cases_this_week = filtered_aggregates_this_week['cases'].sum()
    country_dict['cases_this_week']= cases_this_week
    #filter country-specific-dataframe to only contain the previous seven days ("last week")    
    fourteen_day_ago = pd.to_datetime(today-timedelta(14))
    country_dict['fourteen_days_ago'] = fourteen_day_ago
    filtered_aggregates_last_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=seven_days_ago) &(filtered_aggregates['parsed_date']>fourteen_day_ago)]
    cases_last_week = filtered_aggregates_last_week['cases'].sum()
    country_dict['cases_last_week']= cases_last_week
    # classify country -- verbal description: see README.md
    if (cases_last_week>0)&(cases_this_week > (cases_last_week * 2)):
        country_dict['trend_class'] = moredouble
    elif cases_this_week >= (cases_last_week+threshold_samesame):
        country_dict['trend_class'] = more
    elif (cases_last_week == 0)& (cases_this_week !=0):
        country_dict['trend_class'] = more
    elif (cases_last_week==0)&(cases_this_week==0):
        country_dict['trend_class'] = zerozero
    elif (cases_this_week == cases_last_week):
        country_dict['trend_class'] = samesame
    elif (cases_this_week == 0)&(cases_last_week != 0):
        country_dict['trend_class'] = lesshalf
    elif cases_last_week+threshold_samesame > cases_this_week > cases_last_week -threshold_samesame:
        country_dict['trend_class'] = samesame
    elif cases_this_week < cases_last_week/2:
        country_dict['trend_class'] = lesshalf
    elif cases_this_week <= cases_last_week-threshold_samesame:
        country_dict['trend_class'] = less
    else:
        country_dict['trend_class'] = 'youMISSEDsth'

    # add it all to the list of country dicts
    list_country_dicts.append(country_dict)

In [12]:
two_week_trend = pd.DataFrame(list_country_dicts)
two_week_trend.head()

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
0,AFG,2020-10-09,2020-10-02,408,2020-09-25,99,01_moredouble
1,ALB,2020-10-09,2020-10-02,1093,2020-09-25,885,02_more
2,DZA,2020-10-09,2020-10-02,968,2020-09-25,1111,04_less
3,AND,2020-10-09,2020-10-02,518,2020-09-25,297,02_more
4,AGO,2020-10-09,2020-10-02,891,2020-09-25,609,02_more


In [13]:
two_week_trend['trend_class'].value_counts()

02_more          94
04_less          47
03_samesame      26
01_moredouble    19
06_zerozero      12
05_lesshalf      11
Name: trend_class, dtype: int64

# Check for unclassified countries and other classes

In [14]:
len(two_week_trend[two_week_trend['trend_class']=='youMISSEDsth'])

0

In [15]:
two_week_trend[two_week_trend['trend_class']=='04_less']

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
2,DZA,2020-10-09,2020-10-02,968,2020-09-25,1111,04_less
9,ABW,2020-10-09,2020-10-02,135,2020-09-25,242,04_less
13,BHS,2020-10-09,2020-10-02,493,2020-09-25,521,04_less
14,BHR,2020-10-09,2020-10-02,3048,2020-09-25,4360,04_less
18,BEL,2020-10-09,2020-10-02,14609,2020-09-25,15411,04_less
23,BOL,2020-10-09,2020-10-02,2253,2020-09-25,3098,04_less
27,BRA,2020-10-09,2020-10-02,181352,2020-09-25,189390,04_less
36,CPV,2020-10-09,2020-10-02,591,2020-09-25,647,04_less
40,CHL,2020-10-09,2020-10-02,11266,2020-09-25,13116,04_less
46,CIV,2020-10-09,2020-10-02,227,2020-09-25,254,04_less


In [16]:
two_week_trend[two_week_trend['trend_class']=='03_samesame']

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
6,ATG,2020-10-09,2020-10-02,5,2020-09-25,8,03_samesame
10,AUS,2020-10-09,2020-10-02,110,2020-09-25,113,03_samesame
15,BGD,2020-10-09,2020-10-02,9605,2020-09-25,9603,03_samesame
22,BTN,2020-10-09,2020-10-02,22,2020-09-25,19,03_samesame
38,CAF,2020-10-09,2020-10-02,23,2020-09-25,25,03_samesame
43,COM,2020-10-09,2020-10-02,11,2020-09-25,10,03_samesame
54,DJI,2020-10-09,2020-10-02,6,2020-09-25,10,03_samesame
71,GAB,2020-10-09,2020-10-02,49,2020-09-25,50,03_samesame
72,GMB,2020-10-09,2020-10-02,33,2020-09-25,32,03_samesame
101,JEY,2020-10-09,2020-10-02,24,2020-09-25,20,03_samesame


In [17]:
two_week_trend[two_week_trend['trend_class']=='02_more'].head(30)

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
1,ALB,2020-10-09,2020-10-02,1093,2020-09-25,885,02_more
3,AND,2020-10-09,2020-10-02,518,2020-09-25,297,02_more
4,AGO,2020-10-09,2020-10-02,891,2020-09-25,609,02_more
7,ARG,2020-10-09,2020-10-02,91367,2020-09-25,86723,02_more
8,ARM,2020-10-09,2020-10-02,3623,2020-09-25,2599,02_more
11,AUT,2020-10-09,2020-10-02,6286,2020-09-25,5071,02_more
12,AZE,2020-10-09,2020-10-02,995,2020-09-25,623,02_more
17,BLR,2020-10-09,2020-10-02,2963,2020-09-25,2368,02_more
19,BLZ,2020-10-09,2020-10-02,347,2020-09-25,268,02_more
20,BEN,2020-10-09,2020-10-02,54,2020-09-25,32,02_more


# Count countries per category

In [18]:
current_status = two_week_trend['trend_class'].value_counts().rename_axis('trend_class').reset_index(name='country_count_on_today')

In [19]:
current_status.sort_values(by='trend_class')

Unnamed: 0,trend_class,country_count_on_today
3,01_moredouble,19
0,02_more,94
2,03_samesame,26
1,04_less,47
5,05_lesshalf,11
4,06_zerozero,12


In [20]:
current_status['country_count_on_today'].sum()

209

In [21]:
chart = alt.Chart(current_status).mark_bar(opacity=1,width=300).encode(
    y=alt.Y('sum(country_count_on_today):Q', stack='zero', axis=None),
    color=alt.Color('trend_class:N',scale=alt.Scale(domain=[moredouble, more,samesame,less, lesshalf,zerozero],range=["#BE232D","#D44820","#EE8C0A","#F0AA00","#F0C80F","#00A5FF"]))
    #color=alt.Color('trend_class:N',scale=alt.Scale(domain=['01_moredouble', '02_more','03_samesame','04_less', '05_lesshalf','06_zerozero'],range=["#BE232D","#EE8C0A","#F0C80F","#00A5FF","#002D5A","#96BE00"]))
).properties(
    width=400, 
    height=400
)

numbers_labels = alt.Chart(current_status).mark_text(dx=-130, dy=13, color='#002D5A').encode(
    y=alt.Y('sum(country_count_on_today):Q', stack = 'zero', axis = None),
    detail='trend_class:N',
    text=alt.Text('sum(country_count_on_today):Q', format='.0f'),
)

labeled_chart = chart + numbers_labels
labeled_chart.configure_view(
   strokeOpacity=0
)


# Now do the same for deaths

In [22]:
list_deaths = []
pd_today = pd.to_datetime(today)
for country in countries_list:
    deaths_dict = {}
    #filter dataframe for one specific country at a time
    filtered_aggregates = ecdc[ecdc['countryterritoryCode']==country]
    deaths_dict['country_ISO'] = country
    #filter country-specific-dataframe to only contain the last seven days ("this week")
    seven_days_ago = pd.to_datetime(today-timedelta(7))
    deaths_dict['today'] = today
    deaths_dict['seven_days_ago'] = seven_days_ago
    filtered_aggregates_this_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=pd_today) & (filtered_aggregates['parsed_date']>seven_days_ago)]
    deaths_this_week = filtered_aggregates_this_week['deaths'].sum()
    deaths_dict['deaths_this_week']= deaths_this_week
    #filter country-specific-dataframe to only contain the previous seven days ("last week")    
    fourteen_day_ago = pd.to_datetime(today-timedelta(14))
    deaths_dict['fourteen_days_ago'] = fourteen_day_ago
    filtered_aggregates_last_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=seven_days_ago) &(filtered_aggregates['parsed_date']>fourteen_day_ago)]
    deaths_last_week = filtered_aggregates_last_week['deaths'].sum()
    deaths_dict['deaths_last_week']= deaths_last_week
    # classify country -- verbal description: see README.md
    if (deaths_last_week>0)&(deaths_this_week > (deaths_last_week * 2)):
        deaths_dict['trend_class'] = moredouble
    elif deaths_this_week >= (deaths_last_week+threshold_samesame):
        deaths_dict['trend_class'] = more
    elif (deaths_last_week == 0)& (deaths_this_week !=0):
        deaths_dict['trend_class'] = more
    elif (deaths_last_week==0)&(deaths_this_week==0):
        deaths_dict['trend_class'] = zerozero
    elif (deaths_this_week == deaths_last_week):
        deaths_dict['trend_class'] = samesame
    elif (deaths_this_week == 0)&(deaths_last_week != 0):
        deaths_dict['trend_class'] = lesshalf
    elif deaths_last_week+threshold_samesame > deaths_this_week > deaths_last_week -threshold_samesame:
        deaths_dict['trend_class'] = samesame
    elif deaths_this_week < deaths_last_week/2:
        deaths_dict['trend_class'] = lesshalf
    elif deaths_this_week <= deaths_last_week-threshold_samesame:
        deaths_dict['trend_class'] = less
    else:
        deaths_dict['trend_class'] = 'youMISSEDsth'

    # add it all to the list of country dicts
    list_deaths.append(deaths_dict)

In [23]:
death_trend = pd.DataFrame(list_deaths)
death_trend.head()

Unnamed: 0,country_ISO,today,seven_days_ago,deaths_this_week,fourteen_days_ago,deaths_last_week,trend_class
0,AFG,2020-10-09,2020-10-02,14,2020-09-25,7,02_more
1,ALB,2020-10-09,2020-10-02,23,2020-09-25,18,03_samesame
2,DZA,2020-10-09,2020-10-02,42,2020-09-25,38,03_samesame
3,AND,2020-10-09,2020-10-02,1,2020-09-25,0,02_more
4,AGO,2020-10-09,2020-10-02,23,2020-09-25,24,03_samesame


In [24]:
death_trend['trend_class'].value_counts()

03_samesame      61
06_zerozero      61
02_more          37
04_less          25
01_moredouble    13
05_lesshalf      12
Name: trend_class, dtype: int64

In [25]:
len(death_trend[death_trend['trend_class']=='youMISSEDsth'])

0

In [26]:
death_status = death_trend['trend_class'].value_counts().rename_axis('trend_class').reset_index(name='country_count_on_today')
death_status.sort_values(by='trend_class')

Unnamed: 0,trend_class,country_count_on_today
4,01_moredouble,13
2,02_more,37
0,03_samesame,61
3,04_less,25
5,05_lesshalf,12
1,06_zerozero,61


In [27]:
chart = alt.Chart(death_status).mark_bar(opacity=1,width=300).encode(
    y=alt.Y('sum(country_count_on_today):Q', stack='zero', axis=None),
    color=alt.Color('trend_class:N',scale=alt.Scale(domain=[moredouble, more,samesame,less, lesshalf,zerozero],range=["#BE232D","#D44820","#EE8C0A","#F0AA00","#F0C80F","#00A5FF"]))
    #color=alt.Color('trend_class:N',scale=alt.Scale(domain=['01_moredouble', '02_more','03_samesame','04_less', '05_lesshalf','06_zerozero'],range=["#BE232D","#EE8C0A","#F0C80F","#00A5FF","#002D5A","#96BE00"]))
).properties(
    width=400, 
    height=400
)

numbers_labels = alt.Chart(death_status).mark_text(dx=-130, dy=13, color='#002D5A').encode(
    y=alt.Y('sum(country_count_on_today):Q', stack = 'zero', axis = None),
    detail='trend_class:N',
    text=alt.Text('sum(country_count_on_today):Q', format='.0f'),
)

labeled_chart = chart + numbers_labels
labeled_chart.configure_view(
   strokeOpacity=0
)
