In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

# General Dataset

# Government Dataset and Testing Dataset

#### Testing Dataset

In [2]:
tests = pd.read_csv('data/testing.csv')

In [3]:
tests.head()

Unnamed: 0,Entity,Code,Date,Total tests per thousand
0,Argentina,ARG,"Apr 8, 2020",0.293
1,Argentina,ARG,"Apr 9, 2020",0.326
2,Argentina,ARG,"Apr 10, 2020",0.36
3,Argentina,ARG,"Apr 11, 2020",0.396
4,Argentina,ARG,"Apr 13, 2020",0.434


In [4]:
tests[['Entity', 'Date', 'Total tests per thousand']].isnull().values.any()

False

In [5]:
tests['Code'].isnull().values.any()

True

We can thus delete the `Code` columns from the dataset

In [6]:
tests = tests.drop(columns = ['Code'])

Check if no two dates are the same for a given Country

In [7]:
tests.groupby('Entity')['Date'].apply(lambda x: x.duplicated().any()).unique()

array([False])

In [92]:
selected_countries = ['United States', 'France', 'Belgium', 'Germany']

In [95]:
tests = tests[tests['Entity'].isin(selected_countries)]

In [96]:
tests

Unnamed: 0,Entity,Date,Total tests per thousand
239,Belgium,"Mar 1, 2020",0.005
240,Belgium,"Mar 2, 2020",0.029
241,Belgium,"Mar 3, 2020",0.070
242,Belgium,"Mar 4, 2020",0.127
243,Belgium,"Mar 5, 2020",0.188
...,...,...,...
3078,United States,"Apr 19, 2020",11.665
3079,United States,"Apr 20, 2020",12.081
3080,United States,"Apr 21, 2020",12.538
3081,United States,"Apr 22, 2020",13.485


#### Governement Dataset

In [9]:
gov_oxford = pd.read_csv('data/gov_oxford.csv')

In [10]:
gov_oxford

Unnamed: 0,CountryName,CountryCode,Date,S1_School closing,S1_IsGeneral,S1_Notes,S2_Workplace closing,S2_IsGeneral,S2_Notes,S3_Cancel public events,...,S11_Notes,S12_Testing framework,S12_Notes,S13_Contact tracing,S13_Notes,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyIndexForDisplay,Unnamed: 39
0,Aruba,ABW,20200101,0.0,,,0.0,,,0.0,...,,0.0,,0.0,,,,0.00,0.00,\t
1,Aruba,ABW,20200102,0.0,,,0.0,,,0.0,...,,0.0,,0.0,,,,0.00,0.00,\t
2,Aruba,ABW,20200103,0.0,,,0.0,,,0.0,...,,0.0,,0.0,,,,0.00,0.00,\t
3,Aruba,ABW,20200104,0.0,,,0.0,,,0.0,...,,0.0,,0.0,,,,0.00,0.00,\t
4,Aruba,ABW,20200105,0.0,,,0.0,,,0.0,...,,0.0,,0.0,,,,0.00,0.00,\t
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17277,Kosovo,RKS,20200419,2.0,1.0,,2.0,0.0,,2.0,...,,1.0,,1.0,,510.0,12.0,90.48,90.48,\t
17278,Kosovo,RKS,20200420,2.0,1.0,,2.0,0.0,,2.0,...,,1.0,,1.0,,510.0,12.0,90.48,90.48,\t
17279,Kosovo,RKS,20200421,,,,,,,,...,,,,,,510.0,12.0,,90.48,\t
17280,Kosovo,RKS,20200422,,,,,,,,...,,,,,,510.0,12.0,,90.48,\t


In [11]:
gov_oxford = gov_oxford.drop(columns = ['S1_Notes', 'S1_IsGeneral', 'S2_Notes', 'S2_IsGeneral', 'S3_Notes', 'S3_IsGeneral', 'S4_Notes', 'S4_IsGeneral', 'S5_Notes', 'S5_IsGeneral', 'S6_Notes', 'S6_IsGeneral', 'S7_Notes', 'S8_Notes', 'S9_Notes', 'S10_Notes', 'S11_Notes', 'S12_Notes', 'S13_Notes', 'Unnamed: 39'])

We want every country to be compared on the same date range

In [12]:
len(gov_oxford['CountryName'].unique())

151

In [13]:
len(gov_oxford['Date'].unique())

115

In [14]:
gov_oxford[['CountryName', 'Date']].isnull().values.any()

False

In [15]:
full_dates = gov_oxford[['Date', 'CountryName']].groupby('Date').count()
full_dates = full_dates[full_dates['CountryName'] == len(gov_oxford['CountryName'].unique())]
full_dates.head()

Unnamed: 0_level_0,CountryName
Date,Unnamed: 1_level_1
20200319,151
20200320,151
20200321,151
20200322,151
20200323,151


In [16]:
full_dates = full_dates.reset_index().drop(columns = ['CountryName'])
gov_oxford_map = gov_oxford[gov_oxford['Date'].isin(full_dates)]

In [None]:
def convert_date_appearance():
    

In [17]:
gov_map_si = gov_oxford_map[['CountryName', 'Date', 'StringencyIndexForDisplay']]

In [18]:
gov_map_school = gov_oxford_map[['CountryName', 'Date', 'S1_School closing']]

In [19]:
gov_map_work = gov_oxford_map[['CountryName', 'Date', 'S2_Workplace closing']]

In [20]:
gov_map_events = gov_oxford_map[['CountryName', 'Date', 'S3_Cancel public events']]

In [21]:
gov_map_transport = gov_oxford_map[['CountryName', 'Date', 'S4_Close public transport']]

In [22]:
gov_map_info = gov_oxford_map[['CountryName', 'Date', 'S5_Public information campaigns']]

In [23]:
gov_map_movement = gov_oxford_map[['CountryName', 'Date', 'S6_Restrictions on internal movement']]

In [24]:
gov_map_travel_controls = gov_oxford_map[['CountryName', 'Date', 'S7_International travel controls']]

In [25]:
gov_map_fiscal = gov_oxford_map[['CountryName', 'Date', 'S8_Fiscal measures']]

In [26]:
gov_map_monetary = gov_oxford_map[['CountryName', 'Date', 'S9_Monetary measures']]

In [27]:
gov_map_health_investment = gov_oxford_map[['CountryName', 'Date', 'S10_Emergency investment in health care']]

In [28]:
gov_map_vaccine_investment = gov_oxford_map[['CountryName', 'Date', 'S11_Investment in Vaccines']]

In [29]:
gov_map_testing = gov_oxford_map[['CountryName', 'Date', 'S12_Testing framework']]

In [30]:
gov_map_contact_tracing = gov_oxford_map[['CountryName', 'Date', 'S13_Contact tracing']]

Country selection

In [31]:
selected_countries = ['United States', 'France', 'Germany', 'Belgium']

In [32]:
gov_oxford_select = gov_oxford[gov_oxford['CountryName'].isin(selected_countries)]

In [33]:
gov_oxford_select.head()

Unnamed: 0,CountryName,CountryCode,Date,S1_School closing,S2_Workplace closing,S3_Cancel public events,S4_Close public transport,S5_Public information campaigns,S6_Restrictions on internal movement,S7_International travel controls,S8_Fiscal measures,S9_Monetary measures,S10_Emergency investment in health care,S11_Investment in Vaccines,S12_Testing framework,S13_Contact tracing,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyIndexForDisplay
1265,Belgium,BEL,20200101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1266,Belgium,BEL,20200102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1267,Belgium,BEL,20200103,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1268,Belgium,BEL,20200104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1269,Belgium,BEL,20200105,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
gov_oxford_select[['CountryName', 'Date', 'ConfirmedCases', 'ConfirmedDeaths']].isnull().values.any()

False

In [35]:
gov_selected_cases_si = gov_oxford_select[['CountryName', 'Date', 'ConfirmedCases', 'StringencyIndexForDisplay']]

Non economical measures

In [36]:
non_economical_measures = ['CountryName', 'Date', 'ConfirmedCases', 'S1_School closing', 'S2_Workplace closing', 'S3_Cancel public events', 'S4_Close public transport', 'S5_Public information campaigns', 'S6_Restrictions on internal movement', 'S13_Contact tracing']

In [37]:
gov_selected_non_eco_measures = gov_oxford_select[non_economical_measures]

In [38]:
gov_selected_non_eco_measures.head()

Unnamed: 0,CountryName,Date,ConfirmedCases,S1_School closing,S2_Workplace closing,S3_Cancel public events,S4_Close public transport,S5_Public information campaigns,S6_Restrictions on internal movement,S13_Contact tracing
1265,Belgium,20200101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1266,Belgium,20200102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1267,Belgium,20200103,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1268,Belgium,20200104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1269,Belgium,20200105,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
gov_selected_non_eco_measures[['ConfirmedCases']].isnull().values.any()

False

In [40]:
gov_selected_non_eco_measures[gov_selected_non_eco_measures.isna().any(axis=1)]

Unnamed: 0,CountryName,Date,ConfirmedCases,S1_School closing,S2_Workplace closing,S3_Cancel public events,S4_Close public transport,S5_Public information campaigns,S6_Restrictions on internal movement,S13_Contact tracing
1375,Belgium,20200420,38496.0,,,,,,,
1376,Belgium,20200421,39983.0,,,,,,,
1377,Belgium,20200422,40956.0,,,,,,,
1378,Belgium,20200423,41889.0,,,,,,,
1379,Belgium,20200424,42797.0,,,,,,,
4169,Germany,20200417,133830.0,,,,,,2.0,
4170,Germany,20200418,137439.0,,,,,,2.0,
4171,Germany,20200419,139897.0,,,,,,2.0,
4172,Germany,20200420,141672.0,,,,,,,
4173,Germany,20200421,143457.0,,,,,,,


In [41]:
gov_selected_non_eco_measures[(gov_selected_non_eco_measures['CountryName'] == 'Germany') & (gov_selected_non_eco_measures['S6_Restrictions on internal movement'] == 2)]

Unnamed: 0,CountryName,Date,ConfirmedCases,S1_School closing,S2_Workplace closing,S3_Cancel public events,S4_Close public transport,S5_Public information campaigns,S6_Restrictions on internal movement,S13_Contact tracing
4141,Germany,20200320,14138.0,2.0,0.0,2.0,0.0,1.0,2.0,1.0
4142,Germany,20200321,18187.0,2.0,0.0,2.0,0.0,1.0,2.0,1.0
4143,Germany,20200322,21463.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4144,Germany,20200323,24774.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4145,Germany,20200324,29212.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4146,Germany,20200325,31554.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4147,Germany,20200326,36508.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4148,Germany,20200327,42288.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4149,Germany,20200328,48582.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4150,Germany,20200329,52547.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0


We thus can delete `nan` rows 

In [42]:
gov_selected_non_eco_measures = gov_selected_non_eco_measures.dropna()

In [43]:
gov_selected_non_eco_measures[(gov_selected_non_eco_measures['CountryName'] == 'Germany') & (gov_selected_non_eco_measures['S6_Restrictions on internal movement'] == 2)]

Unnamed: 0,CountryName,Date,ConfirmedCases,S1_School closing,S2_Workplace closing,S3_Cancel public events,S4_Close public transport,S5_Public information campaigns,S6_Restrictions on internal movement,S13_Contact tracing
4141,Germany,20200320,14138.0,2.0,0.0,2.0,0.0,1.0,2.0,1.0
4142,Germany,20200321,18187.0,2.0,0.0,2.0,0.0,1.0,2.0,1.0
4143,Germany,20200322,21463.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4144,Germany,20200323,24774.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4145,Germany,20200324,29212.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4146,Germany,20200325,31554.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4147,Germany,20200326,36508.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4148,Germany,20200327,42288.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4149,Germany,20200328,48582.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0
4150,Germany,20200329,52547.0,2.0,1.0,2.0,0.0,1.0,2.0,1.0


In [44]:
binary_format = {1:0}
binary_format_contact_tracing = {1: 2}

In [45]:
gov_selected_non_eco_measures[['S1_School closing', 'S2_Workplace closing', 'S3_Cancel public events', 'S4_Close public transport', 'S6_Restrictions on internal movement']] = gov_selected_non_eco_measures[['S1_School closing', 'S2_Workplace closing', 'S3_Cancel public events', 'S4_Close public transport', 'S6_Restrictions on internal movement']].replace(binary_format)
gov_selected_non_eco_measures[['S13_Contact tracing']] = gov_selected_non_eco_measures[['S13_Contact tracing']].replace(binary_format_contact_tracing)

In [46]:
def locate_changes(df, column_to_change):
    if df['value'] == False:
        df['value'] = df[column_to_change]
    else:
        df['value'] = np.nan
    return df

def delete_first_row(df):
    return df[1:]



School measure

In [82]:
school = gov_selected_non_eco_measures[['CountryName', 'Date', 'S1_School closing']]

In [83]:
school['value'] = school['S1_School closing'].eq(school['S1_School closing'].shift())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [84]:
school = school.apply(lambda x: locate_changes(x, 'S1_School closing'), axis = 1)

In [85]:
school = school.drop(columns = ['S1_School closing']).dropna()

In [86]:
school_text_measures = {0: 'Schools open', 2: 'School closed '}

In [87]:
school = school.replace(school_text_measures).groupby('CountryName').apply(lambda x: delete_first_row(x))

In [88]:
school

Unnamed: 0,CountryName,Date,value
1265,Belgium,20200101,Schools open
1338,Belgium,20200314,School closed
4062,Germany,20200101,Schools open
4118,Germany,20200226,School closed
5442,France,20200101,Schools open
5517,France,20200316,School closed
16248,United States,20200101,Schools open
16312,United States,20200305,School closed


Workplace measures

In [63]:
workplace = gov_selected_non_eco_measures[['CountryName', 'Date', 'S2_Workplace closing']]
workplace['value'] = workplace['S2_Workplace closing'].eq(workplace['S2_Workplace closing'].shift())
workplace = workplace.apply(lambda x: locate_changes(x, 'S2_Workplace closing'), axis = 1)
workplace = workplace.drop(columns = ['S2_Workplace closing']).dropna()
workplace_text_measures = {0: 'Workplace open', 2: 'Worplace closed '}
workplace = workplace.replace(workplace_text_measures).groupby('CountryName').apply(lambda x: delete_first_row(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Public events measures

In [54]:
events = gov_selected_non_eco_measures[['CountryName', 'Date', 'S3_Cancel public events']]
events['value'] = events['S3_Cancel public events'].eq(events['S3_Cancel public events'].shift())
events = events.apply(lambda x: locate_changes(x, 'S3_Cancel public events'), axis = 1)
events = events.drop(columns = ['S3_Cancel public events']).dropna()
events_text_measures = {0: 'No measures on public events', 2: 'Cancel public events'}
events = events.replace(events_text_measures).groupby('CountryName').apply(lambda x: delete_first_row(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Public transport measures

In [55]:
transport = gov_selected_non_eco_measures[['CountryName', 'Date', 'S4_Close public transport']]
transport['value'] = transport['S4_Close public transport'].eq(transport['S4_Close public transport'].shift())
transport = transport.apply(lambda x: locate_changes(x, 'S4_Close public transport'), axis = 1)
transport = transport.drop(columns = ['S4_Close public transport']).dropna()
transport_text_measures = {0: 'No measures on public transport', 2: 'Close public transport'}
transport = transport.replace(transport_text_measures).groupby('CountryName').apply(lambda x: delete_first_row(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Information campaign measures

In [56]:
campaign = gov_selected_non_eco_measures[['CountryName', 'Date', 'S5_Public information campaigns']]
campaign['value'] = campaign['S5_Public information campaigns'].eq(campaign['S5_Public information campaigns'].shift())
campaign = campaign.apply(lambda x: locate_changes(x, 'S5_Public information campaigns'), axis = 1)
campaign = campaign.drop(columns = ['S5_Public information campaigns']).dropna()
campaign_text_measures = {0: 'No COVID-19 information campaign', 1: 'COVID-19 public information campaign'}
campaign = campaign.replace(campaign_text_measures).groupby('CountryName').apply(lambda x: delete_first_row(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Internal movement measures

In [57]:
internal_movement = gov_selected_non_eco_measures[['CountryName', 'Date', 'S6_Restrictions on internal movement']]
internal_movement['value'] = internal_movement['S6_Restrictions on internal movement'].eq(internal_movement['S6_Restrictions on internal movement'].shift())
internal_movement = internal_movement.apply(lambda x: locate_changes(x, 'S6_Restrictions on internal movement'), axis = 1)
internal_movement = internal_movement.drop(columns = ['S6_Restrictions on internal movement']).dropna()
internal_movement_text_measures = {0: 'No restriction on internal movement', 2: 'Rectriction on movement'}
internal_movement = internal_movement.replace(internal_movement_text_measures).groupby('CountryName').apply(lambda x: delete_first_row(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Contact tracing measures

In [58]:
tracing = gov_selected_non_eco_measures[['CountryName', 'Date', 'S13_Contact tracing']]
tracing['value'] = tracing['S13_Contact tracing'].eq(tracing['S13_Contact tracing'].shift())
tracing = tracing.apply(lambda x: locate_changes(x, 'S13_Contact tracing'), axis = 1)
tracing = tracing.drop(columns = ['S13_Contact tracing']).dropna()
tracing_text_measures = {0: 'No contact tracing', 2: 'Contact tracing used'}
tracing = tracing.replace(tracing_text_measures).groupby('CountryName').apply(lambda x: delete_first_row(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Concatenate all the values

In [59]:
overall_measures = school.append(workplace, ignore_index=True).append(events, ignore_index=True).append(transport, ignore_index=True).append(campaign, ignore_index=True).append(internal_movement, ignore_index=True).append(tracing, ignore_index=True)
overall_measures.head()

Unnamed: 0,CountryName,Date,value
0,Belgium,20200101,Schools open
1,Belgium,20200314,School closed
2,Germany,20200101,Schools open
3,Germany,20200226,School closed
4,France,20200101,Schools open


In [60]:
germany_measures = overall_measures[overall_measures['CountryName'] == 'Germany']
usa_measures = overall_measures[overall_measures['CountryName'] == 'France']
france_measures = overall_measures[overall_measures['CountryName'] == 'United States']
gelgium_measures = overall_measures[overall_measures['CountryName'] == 'Belgium']

# Detailed Datasets