Countermeasures simulations
-----------------------------------

RNN-based approach to simulate the effect of timing and taking action on deaths-per-100k inhabitants.

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
tf.__version__

'2.1.0'

In [3]:
pd.options.display.max_colwidth = 200
pd.options.display.max_columns = 100

#### Data sources

* The countermeasures data is made available as part of the John Hopkins [Containment Measures Database](http://epidemicforecasting.org/containment).
* The Oxford COVID-19 Government Response Tracker is made available as part of the [OxCGRT](https://www.bsg.ox.ac.uk/research/research-projects/oxford-covid-19-government-response-tracker) project.
* The country population data was gathered from the [Word Bank Databank](https://databank.worldbank.org/reports.aspx?source=2&series=SP.POP.TOTL&country=#), with latest data available for 2018.

In [4]:
johnshopkins_df = pd.read_csv("data/countermeasures_db_johnshopkins_2020_03_30.csv")
johnshopkins_df["Date"] = johnshopkins_df["Date"].apply(lambda x: pd.to_datetime(x, format="%Y-%m-%d")).dt.date

In [5]:
oxford_grt_df = pd.read_csv("data/oxford_uni_response_tracker_08042020.csv")
oxford_grt_df["Date"] = oxford_grt_df["Date"].apply(lambda x: pd.to_datetime(str(x), format="%Y-%m-%d")).dt.date

In [6]:
grt_df = oxford_grt_df.drop_duplicates(subset=["CountryName", "StringencyIndex"], keep="first")

In [7]:
measures_df = pd.merge(johnshopkins_df, grt_df, how="outer", left_on=["Date", "Country"], 
                       right_on=["Date", "CountryName"])

In [8]:
population_df = pd.read_csv("data/wb_country_populations_2018.csv")[["Country Name", "2018 [YR2018]"]]
population_df.rename(columns={"2018 [YR2018]": "2018_Country_Population"}, inplace=True)

In [9]:
base_df = pd.merge(measures_df, population_df, how="outer", left_on="Country", right_on="Country Name").fillna(0)
base_df = base_df.loc[base_df["2018_Country_Population"] != ".."]
base_df = base_df.loc[base_df["Country"] != 0]

In [10]:
base_df.head()

Unnamed: 0.1,Unnamed: 0,Symptomatic isolation - targeted,Symptomatic isolation - blanket,Asymptomatic isolation - targeted,Asymptomatic isolation - blanket,Domestic travel restriction,International travel restriction,Testing,Contact tracing,Mask wearing,Hand washing,Gatherings banned,Healthcare specialisation,Public education and incentives,Assisting people to stay home,Public cleaning,Miscellaneous hygiene measures,Public interaction reduction,Nonessential business suspension,School closure,Activity cancellation,Resumption,Diagnostic criteria loosened,Diagnostic criteria tightened,Testing criteria,Date,Country,Confirmed Cases,Deaths,CountryName,CountryCode,S1_School closing,S1_IsGeneral,S1_Notes,S2_Workplace closing,S2_IsGeneral,S2_Notes,S3_Cancel public events,S3_IsGeneral,S3_Notes,S4_Close public transport,S4_IsGeneral,S4_Notes,S5_Public information campaigns,S5_IsGeneral,S5_Notes,S6_Restrictions on internal movement,S6_IsGeneral,S6_Notes,S7_International travel controls,S7_Notes,S8_Fiscal measures,S8_Notes,S9_Monetary measures,S9_Notes,S10_Emergency investment in health care,S10_Notes,S11_Investment in Vaccines,S11_Notes,S12_Testing framework,S12_Notes,S13_Contact tracing,S13_Notes,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyIndexForDisplay,Unnamed: 39,Country Name,2018_Country_Population
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-23,Austria,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,Austria,8847037
1,86.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-24,Austria,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,Austria,8847037
2,172.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-25,Austria,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,Austria,8847037
3,258.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-26,Austria,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,Austria,8847037
4,344.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-27,Austria,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,Austria,8847037


In [11]:
print(base_df["Country"].unique())

['Austria' 'Germany' 'United Kingdom' 'Vietnam' 'South Korea' 'Singapore'
 'Israel' 'Japan' 'Sweden' 'San Marino' 'Slovenia' 'Canada' 'Taiwan'
 'Macau' 'Hong Kong' 'China' 'Thailand' 'Italy' 'Czechia' 'Australia'
 'Trinidad and Tobago' 'Qatar' 'New Zealand' 'Colombia' 'Romania' 'France'
 'Portugal' 'Spain' 'Belgium' 'Luxembourg' 'Albania' 'Andorra'
 'Azerbaijan' 'Belarus' 'Bosnia and Herzegovina' 'Bulgaria' 'Denmark'
 'Estonia' 'Cyprus' 'Croatia' 'Finland' 'Georgia' 'Hungary' 'Latvia'
 'Lithuania' 'Greece' 'Moldova' 'Malta' 'Monaco' 'Netherlands' 'Iceland'
 'Ireland' 'Kosovo' 'Kazakhstan' 'Poland' 'Turkey' 'Ukraine' 'Slovakia'
 'Serbia' 'Switzerland' 'Norway' 'Montenegro' 'Iran' 'Liechtenstein'
 'Russia' 'Mexico' 'Egypt' 'Malaysia' 'Nepal' 'Afghanistan' 'Iraq'
 'Philippines' 'Kuwait' 'South Africa' 'Armenia' 'Pakistan' 'Brazil'
 'Costa Rica' 'Panama' 'India' 'Bahrain' 'United Arab Emirates'
 'Kyrgyzstan' 'Indonesia' 'Namibia' 'Uganda']


In [12]:
base_df.columns

Index(['Unnamed: 0', 'Symptomatic isolation - targeted',
       'Symptomatic isolation - blanket', 'Asymptomatic isolation - targeted',
       'Asymptomatic isolation - blanket', 'Domestic travel restriction',
       'International travel restriction', 'Testing', 'Contact tracing',
       'Mask wearing', 'Hand washing', 'Gatherings banned',
       'Healthcare specialisation', 'Public education and incentives',
       'Assisting people to stay home', 'Public cleaning',
       'Miscellaneous hygiene measures', 'Public interaction reduction',
       'Nonessential business suspension', 'School closure',
       'Activity cancellation', 'Resumption', 'Diagnostic criteria loosened',
       'Diagnostic criteria tightened', 'Testing criteria', 'Date', 'Country',
       'Confirmed Cases', 'Deaths', 'CountryName', 'CountryCode',
       'S1_School closing', 'S1_IsGeneral', 'S1_Notes', 'S2_Workplace closing',
       'S2_IsGeneral', 'S2_Notes', 'S3_Cancel public events', 'S3_IsGeneral',
       'S3_N

In [13]:
other_cm_cols = ['Unnamed: 0', 'Resumption', 'Diagnostic criteria tightened', 'Diagnostic criteria loosened',
                 'Testing criteria', 'Date', 'Country', 'Confirmed Cases', 'Deaths', 'Country Name', 'CountryName',
                 'CountryCode', 'ConfirmedCases', 'ConfirmedDeaths', 'StringencyIndex', 'StringencyIndexForDisplay',
                 'Unnamed: 39', 'Country Name', '2018_Country_Population', 'Deaths_per_100k']

In [14]:
countermeasures = list(filter(lambda m: m not in other_cm_cols and not 
                              m.endswith("_Notes") and not m.endswith("_IsGeneral"), base_df.columns))

In [15]:
df = base_df[countermeasures + ["Date", "Country", "Deaths", "2018_Country_Population"]].fillna(0)

In [16]:
df[countermeasures] = df[countermeasures].mask(df[countermeasures] > 0, 1.0)

## Simulation of the effect and timing of countermeasures

Exploration of the impact of individual countermeasures on reported Deaths per 100.000 citizens in a single country.

In [17]:
country = "Germany"
treatment = "Nonessential business suspension"
days_after_first_case = 7

In [18]:
simulation_df = df[df["Country"] == country].sort_values(by="Date")

In [19]:
train_df = df[df["Country"] != country]
train_countries = list(filter(lambda c: c != country, df["Country"].unique()))

In [20]:
def create_train_and_validation_data(train_df, train_countries):
    return train_df, val_df

In [21]:
simulation_df[["Reported Deaths"]] = simulation_df[["Deaths"]].diff()

In [22]:
simulation_df["Deaths_per_100k"] = simulation_df["Deaths"] / (
    simulation_df["2018_Country_Population"].astype(float) / 100000.0)

In [23]:
simulation_df = simulation_df.fillna(0)

In [24]:
simulation_df.loc[simulation_df[treatment].idxmax(1.0):, ["Date", "Reported Deaths", "Deaths_per_100k"]]

Unnamed: 0,Date,Reported Deaths,Deaths_per_100k
118,2020-03-14,2.0,0.010853
119,2020-03-15,2.0,0.013265
120,2020-03-16,6.0,0.0205
121,2020-03-17,7.0,0.028941
122,2020-03-18,4.0,0.033764
123,2020-03-19,16.0,0.053058
124,2020-03-20,23.0,0.080793
125,2020-03-21,17.0,0.101293
126,2020-03-22,10.0,0.113351
127,2020-03-23,29.0,0.148322


In [25]:
simulation_df.head()

Unnamed: 0,Symptomatic isolation - targeted,Symptomatic isolation - blanket,Asymptomatic isolation - targeted,Asymptomatic isolation - blanket,Domestic travel restriction,International travel restriction,Testing,Contact tracing,Mask wearing,Hand washing,Gatherings banned,Healthcare specialisation,Public education and incentives,Assisting people to stay home,Public cleaning,Miscellaneous hygiene measures,Public interaction reduction,Nonessential business suspension,School closure,Activity cancellation,S1_School closing,S2_Workplace closing,S3_Cancel public events,S4_Close public transport,S5_Public information campaigns,S6_Restrictions on internal movement,S7_International travel controls,S8_Fiscal measures,S9_Monetary measures,S10_Emergency investment in health care,S11_Investment in Vaccines,S12_Testing framework,S13_Contact tracing,Date,Country,Deaths,2018_Country_Population,Reported Deaths,Deaths_per_100k
67,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-23,Germany,0.0,82927922,0.0,0.0
68,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-24,Germany,0.0,82927922,0.0,0.0
69,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-25,Germany,0.0,82927922,0.0,0.0
70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-26,Germany,0.0,82927922,0.0,0.0
71,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-27,Germany,0.0,82927922,0.0,0.0


### RNN GRU model