In [2]:
import dask.dataframe as dd
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from os import path, listdir, walk
from tqdm.notebook import tqdm
import plotly.express as px
import plotly.graph_objects as go
from sklearn import manifold
from nltk.tokenize import WordPunctTokenizer
from collections import Counter
punct_tokenizer = WordPunctTokenizer()

In [3]:
policies_dataset = pd.read_csv("./dataset/Country_Policy.csv", header = None)

In [4]:
policies_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,...,,,,,,,,,,
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,...,,,,,,,,,,
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",...,,,,,,,,,,
3,162,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",...,,,,,,,,,,
4,495,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1701,,,,,,,,,,,...,,,,,,,,,,
1702,,,,,,,,,,,...,,,,,,,,,,
1703,,,,,,,,,,,...,,,,,,,,,,
1704,,,,,,,,,,,...,,,,,,,,,,


In [5]:
policies_dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,...,,,,,,,,,,
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,...,,,,,,,,,,
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",...,,,,,,,,,,
3,162.0,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",...,,,,,,,,,,
4,495.0,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,...,,,,,,,,,,


In [6]:
policies_dataset.rename(columns = {2:'Country'})


Unnamed: 0,0,1,Country,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,...,,,,,,,,,,
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,...,,,,,,,,,,
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",...,,,,,,,,,,
3,162,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",...,,,,,,,,,,
4,495,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1701,,,,,,,,,,,...,,,,,,,,,,
1702,,,,,,,,,,,...,,,,,,,,,,
1703,,,,,,,,,,,...,,,,,,,,,,
1704,,,,,,,,,,,...,,,,,,,,,,


In [7]:
policies_dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,...,,,,,,,,,,
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,...,,,,,,,,,,
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",...,,,,,,,,,,
3,162.0,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",...,,,,,,,,,,
4,495.0,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,...,,,,,,,,,,


In [8]:
policies_dataset = policies_dataset.rename(columns = {0: 'ID', 1: 'Applies To', 2:'Country', 3: 'Starting Date', 
                                                      4: 'Ending Date', 5: 'Description of Measures Implemented', 6: 'Exceptions', 7: 'Implementing City', 
                                                      8: 'Implementing State/Province', 9: 'Keywords', 10: 'Quantity', 
                                                      11 : 'Source', 12: 'Target city', 13: 'Target country', 14: 'Target region', 
                                                      15: 'Target state'})


In [9]:
policies_dataset.head()

Unnamed: 0,ID,Applies To,Country,Starting Date,Ending Date,Description of Measures Implemented,Exceptions,Implementing City,Implementing State/Province,Keywords,...,16,17,18,19,20,21,22,23,24,25
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,...,,,,,,,,,,
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,...,,,,,,,,,,
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",...,,,,,,,,,,
3,162.0,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",...,,,,,,,,,,
4,495.0,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,...,,,,,,,,,,


In [10]:
policies_dataset = policies_dataset.drop(columns = {16,17,18,19,20,21,22,23,24,25})

In [11]:
policies_dataset.head()

Unnamed: 0,ID,Applies To,Country,Starting Date,Ending Date,Description of Measures Implemented,Exceptions,Implementing City,Implementing State/Province,Keywords,Quantity,Source,Target city,Target country,Target region,Target state
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,
3,162.0,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",,https://www.newvision.co.ug/new_vision/news/15...,,,,
4,495.0,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,


In [12]:
policies_dataset['Country'] = policies_dataset['Country'].apply(lambda row: str(row)).apply(lambda row: 'Czech Republic' if 'Czechia' in row else row)
policies_dataset['Country'] = policies_dataset['Country'].apply(lambda row: str(row)).apply(lambda row: 'United States' if 'US' in row else row)

In [13]:
policies_dataset.head()

Unnamed: 0,ID,Applies To,Country,Starting Date,Ending Date,Description of Measures Implemented,Exceptions,Implementing City,Implementing State/Province,Keywords,Quantity,Source,Target city,Target country,Target region,Target state
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,
3,162.0,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",,https://www.newvision.co.ug/new_vision/news/15...,,,,
4,495.0,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,


In [14]:
policies_dataset['Country']

0       Afghanistan
1       Afghanistan
2       Afghanistan
3           Albania
4           Albania
           ...     
1701            nan
1702            nan
1703            nan
1704            nan
1705            nan
Name: Country, Length: 1706, dtype: object

In [15]:
print(policies_dataset['Country'])

0       Afghanistan
1       Afghanistan
2       Afghanistan
3           Albania
4           Albania
           ...     
1701            nan
1702            nan
1703            nan
1704            nan
1705            nan
Name: Country, Length: 1706, dtype: object


In [16]:
print(f"total examples {len(policies_dataset)}")

total examples 1706


In [17]:
policies_dataset.columns

Index(['ID', 'Applies To', 'Country', 'Starting Date', 'Ending Date',
       'Description of Measures Implemented', 'Exceptions',
       'Implementing City', 'Implementing State/Province', 'Keywords',
       'Quantity', 'Source', 'Target city', 'Target country', 'Target region',
       'Target state'],
      dtype='object')

In [18]:
print(f"measures description found: {len(policies_dataset[policies_dataset['Description of Measures Implemented'].notnull()])}")

measures description found: 1641


In [19]:
print(f"measures keywords found: {len(policies_dataset[policies_dataset['Keywords'].notnull()])}")

measures keywords found: 1616


In [20]:
print(f"measures with date found: {len(policies_dataset[policies_dataset['Starting Date'].notnull()])}")

measures with date found: 1640


In [21]:
def check_for_mask(text):
    word_list = ['mask', 'masks']
    text = str(text)
    token = set(punct_tokenizer.tokenize(text.lower()))
    match = set(word_list).intersection(token)
    if match:
        return True
    else:
        return False

In [22]:
policies_dataset['mask'] = policies_dataset['Description of Measures Implemented'].apply(check_for_mask)

In [23]:
mask_country_count = policies_dataset[policies_dataset['mask']==True]['Country'].value_counts()

In [24]:
fig = go.FigureWidget(data = px.bar(mask_country_count, x = mask_country_count.index, y = mask_country_count.values))

In [25]:
print(mask_country_count)

South Korea       7
Hong Kong         6
Taiwan            3
China             3
Czech Republic    3
Nepal             3
Belgium           2
Slovakia          2
Japan             2
Canada            2
Italy             2
Singapore         1
Russia            1
Greece            1
Egypt             1
Sweden            1
United States     1
India             1
Bulgaria          1
Croatia           1
Name: Country, dtype: int64


In [26]:
print(mask_country_count.values)

[7 6 3 3 3 3 2 2 2 2 2 1 1 1 1 1 1 1 1 1]


In [27]:
print(mask_country_count.index)

Index(['South Korea', 'Hong Kong', 'Taiwan', 'China', 'Czech Republic',
       'Nepal', 'Belgium', 'Slovakia', 'Japan', 'Canada', 'Italy', 'Singapore',
       'Russia', 'Greece', 'Egypt', 'Sweden', 'United States', 'India',
       'Bulgaria', 'Croatia'],
      dtype='object')


In [28]:
print(mask_country_count.values)

[7 6 3 3 3 3 2 2 2 2 2 1 1 1 1 1 1 1 1 1]


In [29]:
fig.update_layout(title = 'Countries with mask mentioned in measures dataset',
                  xaxis_title = 'Country Name', yaxis_title = 'Number of events in data')
fig

FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'hovertemplate': 'index=%{x}<br>y=%{y}<ex…

In [30]:
keywords = [['mask','masks'], ['lockdown'], ['social distance', 'social distancing'], ['quarantine']]

In [31]:
 def check_for_keyword(text, keyword):
    text = str(text)
    token = set(punct_tokenizer.tokenize(text.lower()))
    match = set(keyword).intersection(token)
    if match:
        return True
    else:
        return False
for keyword in keywords:
    policies_dataset[keyword[0]] = policies_dataset['Keywords'].apply(lambda text:check_for_keyword(text,keyword))
    policies_dataset[f"{keyword[0]}_implemented"] = policies_dataset['Description of Measures Implemented'].apply(lambda text:check_for_keyword(text,keyword))


In [32]:
policies_dataset.head()

Unnamed: 0,ID,Applies To,Country,Starting Date,Ending Date,Description of Measures Implemented,Exceptions,Implementing City,Implementing State/Province,Keywords,...,Target region,Target state,mask,mask_implemented,lockdown,lockdown_implemented,social distance,social distance_implemented,quarantine,quarantine_implemented
0,,,Afghanistan,2/24/20,,"On 24 February, Afghanistan confirmed the firs...",,,,first case,...,,,False,False,False,False,False,False,False,False
1,,,Afghanistan,3/14/20,,"President of Afghanistan, Ashraf Ghani, told t...",,,,public announcement,...,,,False,False,False,False,False,False,False,False
2,,,Afghanistan,3/18/20,,"On 18 March, the Ministry of Interior Affairs ...",,,,"social distancing, sports cancellation, weddin...",...,,,False,False,False,False,False,False,False,False
3,162.0,,Albania,3/16/20,3/26/20,Albania will close schools and ban public gath...,,,,"first case, school closure",...,,,False,False,False,False,False,False,False,False
4,495.0,,Albania,3/8/20,,"On 8 March 2020, Albania confirmed its first t...",,,,case reporting,...,,,False,False,False,False,False,False,False,False


In [33]:
policies_dataset.to_csv(r'./dataset/Policies_Dataset.csv')

In [34]:
covid_cases = pd.read_csv("./dataset/TimeSeriesWorldCovidConfirm.csv")

In [35]:
covid_cases.head()

Unnamed: 0,"Province/State, Country/Region",1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,6/7/20,6/8/20,6/9/20,6/10/20,6/11/20,6/12/20,6/13/20,6/14/20,6/15/20,6/16/20
0,"#adm1+name, #country+name",,,,,,,,,,...,,,,,,,,,,
1,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,791.0,575.0,542.0,683.0,748.0,656.0,556.0,664.0,761.0,783.0
2,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14.0,17.0,36.0,42.0,44.0,31.0,48.0,57.0,69.0,82.0
3,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,104.0,111.0,117.0,102.0,105.0,109.0,112.0,109.0,112.0,116.0
4,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [37]:
covid_cases = covid_cases.dropna()

In [38]:
covid_cases.head()

Unnamed: 0,"Province/State, Country/Region",1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,6/7/20,6/8/20,6/9/20,6/10/20,6/11/20,6/12/20,6/13/20,6/14/20,6/15/20,6/16/20
1,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,791.0,575.0,542.0,683.0,748.0,656.0,556.0,664.0,761.0,783.0
2,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14.0,17.0,36.0,42.0,44.0,31.0,48.0,57.0,69.0,82.0
3,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,104.0,111.0,117.0,102.0,105.0,109.0,112.0,109.0,112.0,116.0
4,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
5,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,1.0,4.0,17.0,5.0,12.0,8.0,2.0,2.0,6.0
