In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import matplotlib.pyplot as plt
%matplotlib inline
import dateutil.parser
import math
from datetime import date
from datetime import datetime, timedelta
import altair as alt
alt.data_transformers.disable_max_rows()
alt.data_transformers.enable('json')
import seaborn as sns
import re

In [2]:
today = date.today()
print(today)

2020-10-08


In [3]:
# source: https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide
ecdc = pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", encoding = "utf-8")
ecdc.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000
0,08/10/2020,8,10,2020,68,1,Afghanistan,AF,AFG,38041757.0,Asia,1.172396
1,07/10/2020,7,10,2020,62,2,Afghanistan,AF,AFG,38041757.0,Asia,1.059362
2,06/10/2020,6,10,2020,145,5,Afghanistan,AF,AFG,38041757.0,Asia,1.08302
3,05/10/2020,5,10,2020,44,0,Afghanistan,AF,AFG,38041757.0,Asia,0.780721
4,04/10/2020,4,10,2020,7,4,Afghanistan,AF,AFG,38041757.0,Asia,0.665059


In [4]:
#excluding NANs
countries_list = ecdc[ecdc['countryterritoryCode'].notnull()]['countryterritoryCode'].unique()

In [5]:
ecdc['parsed_date'] = [re.sub(r'(\d\d)\/(\d\d)\/(\d\d\d\d)','\g<3>-\g<2>-\g<1>', date) for date in ecdc['dateRep']]
ecdc.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000,parsed_date
0,08/10/2020,8,10,2020,68,1,Afghanistan,AF,AFG,38041757.0,Asia,1.172396,2020-10-08
1,07/10/2020,7,10,2020,62,2,Afghanistan,AF,AFG,38041757.0,Asia,1.059362,2020-10-07
2,06/10/2020,6,10,2020,145,5,Afghanistan,AF,AFG,38041757.0,Asia,1.08302,2020-10-06
3,05/10/2020,5,10,2020,44,0,Afghanistan,AF,AFG,38041757.0,Asia,0.780721,2020-10-05
4,04/10/2020,4,10,2020,7,4,Afghanistan,AF,AFG,38041757.0,Asia,0.665059,2020-10-04


In [6]:
ecdc['parsed_date'] = [datetime.strptime(date, '%Y-%m-%d') for date in ecdc['parsed_date']]

# Find all fridays

In [7]:
#https://stackoverflow.com/questions/2003870/how-can-i-select-all-of-the-sundays-for-a-year-using-python
from datetime import date, timedelta

def allfridays(year):
    d = date(year, 1, 1)                    # January 1st
    d += timedelta(days = 4 - d.weekday())  # First friday
    while d.year == year:
        yield d
        d += timedelta(days = 7)

In [8]:
for d in allfridays(2020):
    print(d)

2020-01-03
2020-01-10
2020-01-17
2020-01-24
2020-01-31
2020-02-07
2020-02-14
2020-02-21
2020-02-28
2020-03-06
2020-03-13
2020-03-20
2020-03-27
2020-04-03
2020-04-10
2020-04-17
2020-04-24
2020-05-01
2020-05-08
2020-05-15
2020-05-22
2020-05-29
2020-06-05
2020-06-12
2020-06-19
2020-06-26
2020-07-03
2020-07-10
2020-07-17
2020-07-24
2020-07-31
2020-08-07
2020-08-14
2020-08-21
2020-08-28
2020-09-04
2020-09-11
2020-09-18
2020-09-25
2020-10-02
2020-10-09
2020-10-16
2020-10-23
2020-10-30
2020-11-06
2020-11-13
2020-11-20
2020-11-27
2020-12-04
2020-12-11
2020-12-18
2020-12-25


In [9]:
# create class variables
moredouble = '01_moredouble'
more = '02_more'
samesame ='03_samesame'
less = '04_less'
lesshalf = '05_lesshalf'
zerozero ='06_zerozero'

In [10]:
threshold_samesame = 7

In [13]:
friday_data_list = []

for country in countries_list:
    #filter dataframe for one specific country at a time
    filtered_aggregates = ecdc[ecdc['countryterritoryCode']==country]
    for this_friday in allfridays(2020):
        country_dict = {}
        #filter country-specific-dataframe to only contain the last seven days ("this week")
        country_dict['country_ISO'] = country
        pd_this_friday = pd.to_datetime(this_friday)
        seven_days_ago = pd.to_datetime(this_friday-timedelta(7))
        country_dict['which_friday'] = this_friday
        country_dict['seven_days_ago'] = seven_days_ago
        filtered_aggregates_this_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=pd_this_friday) & (filtered_aggregates['parsed_date']>seven_days_ago)]
        cases_this_week = filtered_aggregates_this_week['cases'].sum()
        country_dict['cases_this_week']= cases_this_week
        #filter country-specific-dataframe to only contain the previous seven days ("last week")    
        fourteen_day_ago = pd.to_datetime(this_friday-timedelta(14))
        country_dict['fourteen_days_ago'] = fourteen_day_ago
        filtered_aggregates_last_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=seven_days_ago) &(filtered_aggregates['parsed_date']>fourteen_day_ago)]
        cases_last_week = filtered_aggregates_last_week['cases'].sum()
        country_dict['cases_last_week']= cases_last_week
        # classify country
        if (cases_last_week>0)&(cases_this_week > (cases_last_week * 2)):
            country_dict['trend_class'] = moredouble
        elif cases_this_week >= (cases_last_week+threshold_samesame):
            country_dict['trend_class'] = more
        elif (cases_last_week == 0)& (cases_this_week !=0):
            country_dict['trend_class'] = more
        elif (cases_last_week==0)&(cases_this_week==0):
            country_dict['trend_class'] = zerozero
        elif (cases_this_week == cases_last_week):
            country_dict['trend_class'] = samesame
        elif (cases_this_week == 0)&(cases_last_week != 0):
            country_dict['trend_class'] = less
        elif cases_last_week+threshold_samesame > cases_this_week > cases_last_week -threshold_samesame:
            country_dict['trend_class'] = samesame
        elif cases_this_week < cases_last_week/2:
            country_dict['trend_class'] = lesshalf
        elif cases_this_week <= cases_last_week-threshold_samesame:
            country_dict['trend_class'] = less
        else:
            country_dict['trend_class'] = 'youMISSEDsth'
        # add it all to the list of country dicts
        friday_data_list.append(country_dict)


In [14]:
all_week_trend = pd.DataFrame(friday_data_list)
all_week_trend.head()

Unnamed: 0,country_ISO,which_friday,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
0,AFG,2020-01-03,2019-12-27,0,2019-12-20,0,06_zerozero
1,AFG,2020-01-10,2020-01-03,0,2019-12-27,0,06_zerozero
2,AFG,2020-01-17,2020-01-10,0,2020-01-03,0,06_zerozero
3,AFG,2020-01-24,2020-01-17,0,2020-01-10,0,06_zerozero
4,AFG,2020-01-31,2020-01-24,0,2020-01-17,0,06_zerozero


In [15]:
#filter to contain only data points up until the latest available friday
all_week_trend_data = all_week_trend[all_week_trend['which_friday'] <= today]
all_week_trend_data.head()

Unnamed: 0,country_ISO,which_friday,seven_days_ago,cases_this_week,fourteen_days_ago,cases_last_week,trend_class
0,AFG,2020-01-03,2019-12-27,0,2019-12-20,0,06_zerozero
1,AFG,2020-01-10,2020-01-03,0,2019-12-27,0,06_zerozero
2,AFG,2020-01-17,2020-01-10,0,2020-01-03,0,06_zerozero
3,AFG,2020-01-24,2020-01-17,0,2020-01-10,0,06_zerozero
4,AFG,2020-01-31,2020-01-24,0,2020-01-17,0,06_zerozero


In [16]:
#refactor data for plotting
value_counts = pd.DataFrame()

for friday in all_week_trend_data['which_friday']:
    weekly_trend = all_week_trend_data[all_week_trend_data['which_friday']==friday]
    weekly_trend_df = weekly_trend['trend_class'].value_counts().rename_axis('trend_class').reset_index(name='#_countries_in_class')
    weekly_trend_df['time'] = friday
    value_counts = pd.concat([value_counts, weekly_trend_df])

In [17]:
value_counts.head()

Unnamed: 0,trend_class,#_countries_in_class,time
0,06_zerozero,208,2020-01-03
1,02_more,1,2020-01-03
0,06_zerozero,208,2020-01-10
1,05_lesshalf,1,2020-01-10
0,06_zerozero,206,2020-01-17


In [18]:
value_counts['calendar_week'] = [day.isocalendar()[1] for day in value_counts['time']]

In [19]:
bars = alt.Chart(value_counts).mark_bar().encode(
    x=alt.X('calendar_week:N'),
    y=alt.Y('sum(#_countries_in_class):Q', stack='zero', axis = None),
    color=alt.Color('trend_class:N',scale=alt.Scale(domain=[moredouble, more,samesame,less, lesshalf,zerozero],range=["#BE232D","#D44820","#EE8C0A","#F0AA00","#F0C80F","#00A5FF"]))
)

bars.properties(width=400, height=400)

# Now do the same for deaths

In [20]:
friday_deaths_list = []

for country in countries_list:
    #filter dataframe for one specific country at a time
    filtered_aggregates = ecdc[ecdc['countryterritoryCode']==country]
    for this_friday in allfridays(2020):
        country_dict = {}
        #filter country-specific-dataframe to only contain the last seven days ("this week")
        country_dict['country_ISO'] = country
        pd_this_friday = pd.to_datetime(this_friday)
        seven_days_ago = pd.to_datetime(this_friday-timedelta(7))
        country_dict['which_friday'] = this_friday
        country_dict['seven_days_ago'] = seven_days_ago
        filtered_aggregates_this_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=pd_this_friday) & (filtered_aggregates['parsed_date']>seven_days_ago)]
        deaths_this_week = filtered_aggregates_this_week['deaths'].sum()
        country_dict['deaths_this_week']= deaths_this_week
        #filter country-specific-dataframe to only contain the previous seven days ("last week")    
        fourteen_day_ago = pd.to_datetime(this_friday-timedelta(14))
        country_dict['fourteen_days_ago'] = fourteen_day_ago
        filtered_aggregates_last_week = filtered_aggregates[(filtered_aggregates['parsed_date']<=seven_days_ago) &(filtered_aggregates['parsed_date']>fourteen_day_ago)]
        deaths_last_week = filtered_aggregates_last_week['deaths'].sum()
        country_dict['deaths_last_week']= deaths_last_week
        # classify country
        if (deaths_last_week>0)&(deaths_this_week > (deaths_last_week * 2)):
            country_dict['trend_class'] = moredouble
        elif deaths_this_week >= (deaths_last_week+threshold_samesame):
            country_dict['trend_class'] = more
        elif (deaths_last_week == 0)& (deaths_this_week !=0):
            country_dict['trend_class'] = more
        elif (deaths_last_week==0)&(deaths_this_week==0):
            country_dict['trend_class'] = zerozero
        elif (deaths_this_week == deaths_last_week):
            country_dict['trend_class'] = samesame
        elif (deaths_this_week == 0)&(deaths_last_week != 0):
            country_dict['trend_class'] = less
        elif deaths_last_week+threshold_samesame > deaths_this_week > deaths_last_week -threshold_samesame:
            country_dict['trend_class'] = samesame
        elif deaths_this_week < deaths_last_week/2:
            country_dict['trend_class'] = lesshalf
        elif cases_this_week <= deaths_last_week-threshold_samesame:
            country_dict['trend_class'] = less
        else:
            country_dict['trend_class'] = 'youMISSEDsth'
        # add it all to the list of country dicts
        friday_deaths_list.append(country_dict)

In [21]:
all_week_deaths_trend = pd.DataFrame(friday_deaths_list)
all_week_deaths_trend.head()

Unnamed: 0,country_ISO,which_friday,seven_days_ago,deaths_this_week,fourteen_days_ago,deaths_last_week,trend_class
0,AFG,2020-01-03,2019-12-27,0,2019-12-20,0,06_zerozero
1,AFG,2020-01-10,2020-01-03,0,2019-12-27,0,06_zerozero
2,AFG,2020-01-17,2020-01-10,0,2020-01-03,0,06_zerozero
3,AFG,2020-01-24,2020-01-17,0,2020-01-10,0,06_zerozero
4,AFG,2020-01-31,2020-01-24,0,2020-01-17,0,06_zerozero


In [22]:
#filter to contain only data points up until the latest available friday
all_week_trend_deaths = all_week_deaths_trend[all_week_deaths_trend['which_friday'] <= today]
all_week_trend_deaths.head()

Unnamed: 0,country_ISO,which_friday,seven_days_ago,deaths_this_week,fourteen_days_ago,deaths_last_week,trend_class
0,AFG,2020-01-03,2019-12-27,0,2019-12-20,0,06_zerozero
1,AFG,2020-01-10,2020-01-03,0,2019-12-27,0,06_zerozero
2,AFG,2020-01-17,2020-01-10,0,2020-01-03,0,06_zerozero
3,AFG,2020-01-24,2020-01-17,0,2020-01-10,0,06_zerozero
4,AFG,2020-01-31,2020-01-24,0,2020-01-17,0,06_zerozero


In [23]:
#refactor data for plotting
value_counts = pd.DataFrame()

for friday in all_week_trend_deaths['which_friday']:
    weekly_trend = all_week_trend_deaths[all_week_trend_deaths['which_friday']==friday]
    weekly_trend_df = weekly_trend['trend_class'].value_counts().rename_axis('trend_class').reset_index(name='#_countries_in_class')
    weekly_trend_df['time'] = friday
    value_counts = pd.concat([value_counts, weekly_trend_df])

In [24]:
value_counts.head()

Unnamed: 0,trend_class,#_countries_in_class,time
0,06_zerozero,209,2020-01-03
0,06_zerozero,209,2020-01-10
0,06_zerozero,208,2020-01-17
1,02_more,1,2020-01-17
0,06_zerozero,208,2020-01-24


In [25]:
value_counts['calendar_week'] = [day.isocalendar()[1] for day in value_counts['time']]

In [26]:
bars = alt.Chart(value_counts).mark_bar().encode(
    x=alt.X('calendar_week:N'),
    y=alt.Y('sum(#_countries_in_class):Q', stack='zero', axis = None),
    color=alt.Color('trend_class:N',scale=alt.Scale(domain=[moredouble, more,samesame,less, lesshalf,zerozero],range=["#BE232D","#D44820","#EE8C0A","#F0AA00","#F0C80F","#00A5FF"]))
)

bars.properties(width=400, height=400)