## Forecasting COVID-19 Cases
### By: Jagandeep Singh
#### This notebook contains the extraction of useful information from the state policy updates dataset.

In [1]:
import pandas as pd
import numpy as np
import pickle
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('state_policy_updates_20200925_0346.csv')

In [3]:
df.shape

(2501, 10)

In [4]:
df_state = df[df.policy_level == 'state']

In [5]:
df_state.policy_type.value_counts()

Shelter in Place                                                        101
Food and Drink                                                           98
Non-Essential Businesses                                                 96
Gyms                                                                     94
State of Emergency                                                       51
Modify Medicaid Requirements With 1135 Waivers Date Of CMS Approval      51
Childcare (K-12)                                                         50
Allow Expand Medicaid Telehealth Coverage                                50
Entertainment                                                            50
Allow Audio Only Telehealth                                              48
Mandate Face Mask Use By All Individuals In Public Facing Businesses     45
Resumed Elective Medical Procedures                                      36
Mandate Face Mask Use By All Individuals In Public Spaces                35
Order Freezi

In [6]:
df_mask = df_state[(df_state.policy_type == 'Mandate Face Mask Use By All Individuals In Public Facing Businesses') | 
                  (df_state.policy_type == 'Mandate Face Mask Use By All Individuals In Public Spaces ')]

In [7]:
df_mask.head()

Unnamed: 0,state_id,county,fips_code,policy_level,date,policy_type,start_stop,comments,source,total_phases
61,AR,,,state,2020-05-11,Mandate Face Mask Use By All Individuals In Pu...,start,Other measures and details for this policy inc...,BU COVID-19 State Policy Database,
132,NM,,,state,2020-05-06,Mandate Face Mask Use By All Individuals In Pu...,start,Other measures and details for this policy inc...,BU COVID-19 State Policy Database,
156,DC,,,state,2020-04-15,Mandate Face Mask Use By All Individuals In Pu...,start,Other measures and details for this policy inc...,BU COVID-19 State Policy Database,
165,WY,,,state,2020-05-01,Mandate Face Mask Use By All Individuals In Pu...,start,Other measures and details for this policy inc...,BU COVID-19 State Policy Database,
239,CO,,,state,2020-04-23,Mandate Face Mask Use By All Individuals In Pu...,start,Other measures and details for this policy inc...,BU COVID-19 State Policy Database,


In [8]:
df_mask['date'] = pd.to_datetime(df_mask['date'])

In [9]:
to_drop = ['county', 'fips_code', 'policy_level', 'policy_type', 'start_stop', 'comments', 'source', 'total_phases']
df_mask.drop(to_drop, axis = 1, inplace = True)

In [10]:
df_mask.head()

Unnamed: 0,state_id,date
61,AR,2020-05-11
132,NM,2020-05-06
156,DC,2020-04-15
165,WY,2020-05-01
239,CO,2020-04-23


In [11]:
df_mask.set_index('date', inplace = True)

df_mask dataset have the state_id and start date of mask mandatory policy of each state

In [12]:
df_mask.head()

Unnamed: 0_level_0,state_id
date,Unnamed: 1_level_1
2020-05-11,AR
2020-05-06,NM
2020-04-15,DC
2020-05-01,WY
2020-04-23,CO


In [13]:
df_mask.head()

Unnamed: 0_level_0,state_id
date,Unnamed: 1_level_1
2020-05-11,AR
2020-05-06,NM
2020-04-15,DC
2020-05-01,WY
2020-04-23,CO


In [14]:
df_mask['num'] = np.arange(len(df_mask))+1

In [15]:
df_mask.drop(columns = ['state_id'], axis = 1, inplace = True)

In [16]:
df_mask.head()

Unnamed: 0_level_0,num
date,Unnamed: 1_level_1
2020-05-11,1
2020-05-06,2
2020-04-15,3
2020-05-01,4
2020-04-23,5


df_mask dataset have number of states with mandatory mask policy with number of states

In [17]:
df_mask.to_pickle('mand_mask.pkl')

### Counting how many states have shelter in place at any date

In [18]:
df_shelter = df_state[df_state.policy_type == 'Shelter in Place']

In [19]:
to_drop = ['county', 'fips_code', 'policy_level', 'policy_type', 'comments', 'source', 'total_phases']
df_shelter.drop(to_drop, axis = 1, inplace = True)

In [20]:
df_shelter['date'] = pd.to_datetime(df_shelter['date'])

In [21]:
df_shelter.set_index('date', inplace = True)

In [22]:
df_shelter.sort_index(inplace = True)

In [23]:
df_shelter.head()

Unnamed: 0_level_0,state_id,start_stop
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-15,PR,start
2020-03-19,CA,start
2020-03-20,GU,start
2020-03-21,NJ,start
2020-03-21,IL,start


In [24]:
df_shelter['num'] = np.where(df_shelter['start_stop'] == 'start', 1, -1).cumsum()

In [25]:
df_shelter.head()

Unnamed: 0_level_0,state_id,start_stop,num
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-15,PR,start,1
2020-03-19,CA,start,2
2020-03-20,GU,start,3
2020-03-21,NJ,start,4
2020-03-21,IL,start,5


In [26]:
df_shelter.drop(columns = ['state_id', 'start_stop'], axis = 1, inplace = True)

In [27]:
df_shelter.head()

Unnamed: 0_level_0,num
date,Unnamed: 1_level_1
2020-03-15,1
2020-03-19,2
2020-03-20,3
2020-03-21,4
2020-03-21,5


In [28]:
df_shelter.to_pickle('shelter.pkl')

### Counting how many states have Food and Drink restrictions at each date

In [30]:
df_food = df_state[df_state.policy_type == 'Food and Drink']

In [31]:
to_drop = ['county', 'fips_code', 'policy_level', 'policy_type', 'comments', 'source', 'total_phases']
df_food.drop(to_drop, axis = 1, inplace = True)

In [32]:
df_food['date'] = pd.to_datetime(df_food['date'])

In [33]:
df_food.set_index('date', inplace = True)

In [34]:
df_food.sort_index(inplace = True)

In [35]:
df_food.head()

Unnamed: 0_level_0,state_id,start_stop
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-15,OH,start
2020-03-16,IN,start
2020-03-16,DE,start
2020-03-16,NH,start
2020-03-16,MD,start


In [36]:
df_food['num'] = np.where(df_food['start_stop'] == 'start', 1, -1).cumsum()

In [37]:
df_food.head()

Unnamed: 0_level_0,state_id,start_stop,num
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-15,OH,start,1
2020-03-16,IN,start,2
2020-03-16,DE,start,3
2020-03-16,NH,start,4
2020-03-16,MD,start,5


In [38]:
df_food.drop(columns = ['state_id', 'start_stop'], axis = 1, inplace = True)

In [39]:
df_food.head()

Unnamed: 0_level_0,num
date,Unnamed: 1_level_1
2020-03-15,1
2020-03-16,2
2020-03-16,3
2020-03-16,4
2020-03-16,5


In [40]:
df_food.to_pickle('food.pkl')