In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import matplotlib.pyplot as plt
%matplotlib inline
import dateutil.parser
import math
from datetime import date
from datetime import datetime, timedelta
from vega_datasets import data
import altair as alt
alt.data_transformers.disable_max_rows()
alt.data_transformers.enable('json')
import seaborn as sns
import re

In [2]:
today = date.today()
print(today)

2020-10-08


In [3]:
# source: https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide
ecdc = pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", encoding = "utf-8")
ecdc.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000
0,08/10/2020,8,10,2020,68,1,Afghanistan,AF,AFG,38041757.0,Asia,1.172396
1,07/10/2020,7,10,2020,62,2,Afghanistan,AF,AFG,38041757.0,Asia,1.059362
2,06/10/2020,6,10,2020,145,5,Afghanistan,AF,AFG,38041757.0,Asia,1.08302
3,05/10/2020,5,10,2020,44,0,Afghanistan,AF,AFG,38041757.0,Asia,0.780721
4,04/10/2020,4,10,2020,7,4,Afghanistan,AF,AFG,38041757.0,Asia,0.665059


In [4]:
#excluding NANs
countries_list = ecdc[ecdc['countryterritoryCode'].notnull()]['countryterritoryCode'].unique()


In [5]:
len(countries_list)

209

# configure date to be datetime

In [6]:
ecdc['parsed_date'] = [re.sub(r'(\d\d)\/(\d\d)\/(\d\d\d\d)','\g<3>-\g<2>-\g<1>', date) for date in ecdc['dateRep']]
ecdc.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000,parsed_date
0,08/10/2020,8,10,2020,68,1,Afghanistan,AF,AFG,38041757.0,Asia,1.172396,2020-10-08
1,07/10/2020,7,10,2020,62,2,Afghanistan,AF,AFG,38041757.0,Asia,1.059362,2020-10-07
2,06/10/2020,6,10,2020,145,5,Afghanistan,AF,AFG,38041757.0,Asia,1.08302,2020-10-06
3,05/10/2020,5,10,2020,44,0,Afghanistan,AF,AFG,38041757.0,Asia,0.780721,2020-10-05
4,04/10/2020,4,10,2020,7,4,Afghanistan,AF,AFG,38041757.0,Asia,0.665059,2020-10-04


In [7]:
ecdc['parsed_date'] = [datetime.strptime(date, '%Y-%m-%d') for date in ecdc['parsed_date']]

In [8]:
# check whether latest date is today
ecdc['parsed_date'][0] == datetime.strptime(str(today),'%Y-%m-%d')

True

# Classify countries

In [9]:
moredouble = '01_moredouble'
more = '02_more'
samesame ='03_samesame'
less = '04_less'
lesshalf = '05_lesshalf'
zerozero ='06_zerozero'

In [10]:
threshold_samesame = 7

In [11]:
list_country_dicts = []
pd_today = pd.to_datetime(today)
for country in countries_list:
    country_dict = {}
    #filter dataframe for one specific country at a time
    filtered_aggregates = ecdc[ecdc['countryterritoryCode']==country]
    country_dict['country_ISO'] = country
    #filter country-specific-dataframe to only contain the last seven days ("this week")
    seven_days_ago = pd.to_datetime(today-timedelta(7))
    country_dict['today'] = today
    country_dict['seven_days_ago'] = seven_days_ago
    filtered_aggregates_this_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=pd_today) & (filtered_aggregates['parsed_date']>seven_days_ago)]
    cases_this_week = filtered_aggregates_this_week['cases'].sum()
    country_dict['cases_this_week']= cases_this_week
    #filter country-specific-dataframe to only contain the previous seven days ("last week")    
    fourteen_day_ago = pd.to_datetime(today-timedelta(14))
    country_dict['fourteen_days_ago'] = fourteen_day_ago
    filtered_aggregates_last_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=seven_days_ago) &(filtered_aggregates['parsed_date']>fourteen_day_ago)]
    cases_last_week = filtered_aggregates_last_week['cases'].sum()
    country_dict['cases_last_week']= cases_last_week
    # classify country -- verbal description: see README.md
    if (cases_last_week>0)&(cases_this_week > (cases_last_week * 2)):
        country_dict['trend_class'] = moredouble
    elif cases_this_week >= (cases_last_week+threshold_samesame):
        country_dict['trend_class'] = more
    elif (cases_last_week == 0)& (cases_this_week !=0):
        country_dict['trend_class'] = more
    elif (cases_last_week==0)&(cases_this_week==0):
        country_dict['trend_class'] = zerozero
    elif (cases_this_week == cases_last_week):
        country_dict['trend_class'] = samesame
    elif (cases_this_week == 0)&(cases_last_week != 0):
        country_dict['trend_class'] = lesshalf
    elif cases_last_week+threshold_samesame > cases_this_week > cases_last_week -threshold_samesame:
        country_dict['trend_class'] = samesame
    elif cases_this_week < cases_last_week/2:
        country_dict['trend_class'] = lesshalf
    elif cases_this_week <= cases_last_week-threshold_samesame:
        country_dict['trend_class'] = less
    else:
        country_dict['trend_class'] = 'youMISSEDsth'

    # add it all to the list of country dicts
    list_country_dicts.append(country_dict)

In [12]:
two_week_trend = pd.DataFrame(list_country_dicts)
two_week_trend.head()

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
0,AFG,2020-10-08,2020-10-01,348,2020-09-24,98,01_moredouble
1,ALB,2020-10-08,2020-10-01,1081,2020-09-24,862,02_more
2,DZA,2020-10-08,2020-10-01,990,2020-09-24,1130,04_less
3,AND,2020-10-08,2020-10-01,518,2020-09-24,297,02_more
4,AGO,2020-10-08,2020-10-01,753,2020-09-24,736,02_more


In [13]:
two_week_trend['trend_class'].value_counts()

02_more          92
04_less          52
03_samesame      23
05_lesshalf      15
01_moredouble    14
06_zerozero      13
Name: trend_class, dtype: int64

# Check for unclassified countries and other classes

In [14]:
len(two_week_trend[two_week_trend['trend_class']=='youMISSEDsth'])

0

In [15]:
two_week_trend[two_week_trend['trend_class']=='04_less']

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
2,DZA,2020-10-08,2020-10-01,990,2020-09-24,1130,04_less
9,ABW,2020-10-08,2020-10-01,145,2020-09-24,242,04_less
14,BHR,2020-10-08,2020-10-01,3068,2020-09-24,4462,04_less
18,BEL,2020-10-08,2020-10-01,12379,2020-09-24,13707,04_less
23,BOL,2020-10-08,2020-10-01,2395,2020-09-24,3321,04_less
34,CMR,2020-10-08,2020-10-01,86,2020-09-24,148,04_less
36,CPV,2020-10-08,2020-10-01,600,2020-09-24,612,04_less
40,CHL,2020-10-08,2020-10-01,11449,2020-09-24,13088,04_less
46,CIV,2020-10-08,2020-10-01,211,2020-09-24,294,04_less
48,CUB,2020-10-08,2020-10-01,301,2020-09-24,327,04_less


In [16]:
two_week_trend[two_week_trend['trend_class']=='03_samesame']

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
6,ATG,2020-10-08,2020-10-01,7,2020-09-24,4,03_samesame
22,BTN,2020-10-08,2020-10-01,22,2020-09-24,21,03_samesame
24,BES,2020-10-08,2020-10-01,39,2020-09-24,37,03_samesame
33,KHM,2020-10-08,2020-10-01,4,2020-09-24,2,03_samesame
37,CYM,2020-10-08,2020-10-01,2,2020-09-24,1,03_samesame
38,CAF,2020-10-08,2020-10-01,23,2020-09-24,27,03_samesame
43,COM,2020-10-08,2020-10-01,12,2020-09-24,9,03_samesame
54,DJI,2020-10-08,2020-10-01,7,2020-09-24,9,03_samesame
62,EST,2020-10-08,2020-10-01,344,2020-09-24,338,03_samesame
63,SWZ,2020-10-08,2020-10-01,135,2020-09-24,139,03_samesame


In [17]:
two_week_trend[two_week_trend['trend_class']=='02_more'].head(30)

Unnamed: 0,country_ISO,today,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
1,ALB,2020-10-08,2020-10-01,1081,2020-09-24,862,02_more
3,AND,2020-10-08,2020-10-01,518,2020-09-24,297,02_more
4,AGO,2020-10-08,2020-10-01,753,2020-09-24,736,02_more
7,ARG,2020-10-08,2020-10-01,89914,2020-09-24,86189,02_more
8,ARM,2020-10-08,2020-10-01,3396,2020-09-24,2482,02_more
10,AUS,2020-10-08,2020-10-01,119,2020-09-24,90,02_more
11,AUT,2020-10-08,2020-10-01,5629,2020-09-24,5195,02_more
12,AZE,2020-10-08,2020-10-01,884,2020-09-24,705,02_more
13,BHS,2020-10-08,2020-10-01,590,2020-09-24,505,02_more
15,BGD,2020-10-08,2020-10-01,9672,2020-09-24,9635,02_more


# Count countries per category

In [18]:
current_status = two_week_trend['trend_class'].value_counts().rename_axis('trend_class').reset_index(name='country_count_on_today')

In [19]:
current_status.sort_values(by='trend_class')

Unnamed: 0,trend_class,country_count_on_today
4,01_moredouble,14
0,02_more,92
2,03_samesame,23
1,04_less,52
3,05_lesshalf,15
5,06_zerozero,13


In [20]:
current_status['country_count_on_today'].sum()

209

In [21]:
chart = alt.Chart(current_status).mark_bar(opacity=1,width=300).encode(
    y=alt.Y('sum(country_count_on_today):Q', stack='zero', axis=None),
    color=alt.Color('trend_class:N',scale=alt.Scale(domain=[moredouble, more,samesame,less, lesshalf,zerozero],range=["#BE232D","#D44820","#EE8C0A","#F0AA00","#F0C80F","#00A5FF"]))
    #color=alt.Color('trend_class:N',scale=alt.Scale(domain=['01_moredouble', '02_more','03_samesame','04_less', '05_lesshalf','06_zerozero'],range=["#BE232D","#EE8C0A","#F0C80F","#00A5FF","#002D5A","#96BE00"]))
).properties(
    width=400, 
    height=400
)

numbers_labels = alt.Chart(current_status).mark_text(dx=-130, dy=13, color='#002D5A').encode(
    y=alt.Y('sum(country_count_on_today):Q', stack = 'zero', axis = None),
    detail='trend_class:N',
    text=alt.Text('sum(country_count_on_today):Q', format='.0f'),
)

labeled_chart = chart + numbers_labels
labeled_chart.configure_view(
   strokeOpacity=0
)


# Now do the same for deaths

In [22]:
list_deaths = []
pd_today = pd.to_datetime(today)
for country in countries_list:
    deaths_dict = {}
    #filter dataframe for one specific country at a time
    filtered_aggregates = ecdc[ecdc['countryterritoryCode']==country]
    deaths_dict['country_ISO'] = country
    #filter country-specific-dataframe to only contain the last seven days ("this week")
    seven_days_ago = pd.to_datetime(today-timedelta(7))
    deaths_dict['today'] = today
    deaths_dict['seven_days_ago'] = seven_days_ago
    filtered_aggregates_this_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=pd_today) & (filtered_aggregates['parsed_date']>seven_days_ago)]
    deaths_this_week = filtered_aggregates_this_week['deaths'].sum()
    deaths_dict['deaths_this_week']= deaths_this_week
    #filter country-specific-dataframe to only contain the previous seven days ("last week")    
    fourteen_day_ago = pd.to_datetime(today-timedelta(14))
    deaths_dict['fourteen_days_ago'] = fourteen_day_ago
    filtered_aggregates_last_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=seven_days_ago) &(filtered_aggregates['parsed_date']>fourteen_day_ago)]
    deaths_last_week = filtered_aggregates_last_week['deaths'].sum()
    deaths_dict['deaths_last_week']= deaths_last_week
    # classify country -- verbal description: see README.md
    if (deaths_last_week>0)&(deaths_this_week > (deaths_last_week * 2)):
        deaths_dict['trend_class'] = moredouble
    elif deaths_this_week >= (deaths_last_week+threshold_samesame):
        deaths_dict['trend_class'] = more
    elif (deaths_last_week == 0)& (deaths_this_week !=0):
        deaths_dict['trend_class'] = more
    elif (deaths_last_week==0)&(deaths_this_week==0):
        deaths_dict['trend_class'] = zerozero
    elif (deaths_this_week == deaths_last_week):
        deaths_dict['trend_class'] = samesame
    elif (deaths_this_week == 0)&(deaths_last_week != 0):
        deaths_dict['trend_class'] = lesshalf
    elif deaths_last_week+threshold_samesame > deaths_this_week > deaths_last_week -threshold_samesame:
        deaths_dict['trend_class'] = samesame
    elif deaths_this_week < deaths_last_week/2:
        deaths_dict['trend_class'] = lesshalf
    elif deaths_this_week <= deaths_last_week-threshold_samesame:
        deaths_dict['trend_class'] = less
    else:
        deaths_dict['trend_class'] = 'youMISSEDsth'

    # add it all to the list of country dicts
    list_deaths.append(deaths_dict)

In [23]:
death_trend = pd.DataFrame(list_deaths)
death_trend.head()

Unnamed: 0,country_ISO,today,seven_days_ago,deaths_this_week,fourteen_days_ago,deaths_last_week,trend_class
0,AFG,2020-10-08,2020-10-01,12,2020-09-24,7,03_samesame
1,ALB,2020-10-08,2020-10-01,20,2020-09-24,17,03_samesame
2,DZA,2020-10-08,2020-10-01,45,2020-09-24,36,02_more
3,AND,2020-10-08,2020-10-01,0,2020-09-24,0,06_zerozero
4,AGO,2020-10-08,2020-10-01,29,2020-09-24,27,03_samesame


In [24]:
death_trend['trend_class'].value_counts()

03_samesame      67
06_zerozero      61
02_more          32
04_less          25
05_lesshalf      13
01_moredouble    11
Name: trend_class, dtype: int64

In [25]:
len(death_trend[death_trend['trend_class']=='youMISSEDsth'])

0

In [26]:
death_status = death_trend['trend_class'].value_counts().rename_axis('trend_class').reset_index(name='country_count_on_today')
death_status.sort_values(by='trend_class')

Unnamed: 0,trend_class,country_count_on_today
5,01_moredouble,11
2,02_more,32
0,03_samesame,67
3,04_less,25
4,05_lesshalf,13
1,06_zerozero,61


In [27]:
chart = alt.Chart(death_status).mark_bar(opacity=1,width=300).encode(
    y=alt.Y('sum(country_count_on_today):Q', stack='zero', axis=None),
    color=alt.Color('trend_class:N',scale=alt.Scale(domain=[moredouble, more,samesame,less, lesshalf,zerozero],range=["#BE232D","#D44820","#EE8C0A","#F0AA00","#F0C80F","#00A5FF"]))
    #color=alt.Color('trend_class:N',scale=alt.Scale(domain=['01_moredouble', '02_more','03_samesame','04_less', '05_lesshalf','06_zerozero'],range=["#BE232D","#EE8C0A","#F0C80F","#00A5FF","#002D5A","#96BE00"]))
).properties(
    width=400, 
    height=400
)

numbers_labels = alt.Chart(death_status).mark_text(dx=-130, dy=13, color='#002D5A').encode(
    y=alt.Y('sum(country_count_on_today):Q', stack = 'zero', axis = None),
    detail='trend_class:N',
    text=alt.Text('sum(country_count_on_today):Q', format='.0f'),
)

labeled_chart = chart + numbers_labels
labeled_chart.configure_view(
   strokeOpacity=0
)
