In [49]:
import pandas as pd
import numpy as np
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import os

### Making date format consistent

In [68]:
npisDf = pd.read_csv("data/complete_npis_inherited_policies.csv")
npisDf = npisDf[npisDf["npi"]!='Other'] # Removing Others (only 2 instances)
def clean_start_date(date):
    if date is None:
        return None
    else:
        date = date.strip()
        date = date.lower()
        if date == '' or date == 'nan' or date == 'none in place':
            return None
        elif '*' in str(date):
            date = date.replace("*","")
    month, day, year = date.split("/")
    month = int(month)
    day = int(day)
    year = int(year)
    if year == 20:
        year = 2020
    
    return pd.datetime(year, month, day)
npisDf["start_date"] = npisDf["start_date"].astype('str').apply(lambda x: clean_start_date(x))
npisDf.head()

Unnamed: 0,fip_code,county,state,npi,start_date,end_date,citation,note
0,1,,Alabama,gathering_size_10_0,2020-03-28,,http://www.alabamapublichealth.gov/news/2020/0...,max 10 people permitted if maintain 6 feet
1,1,,Alabama,gathering_size_25_to_11,2020-03-19,,http://www.alabamapublichealth.gov/news/2020/0...,
2,1,,Alabama,non-essential_services_closure,2020-03-28,,http://www.alabamapublichealth.gov/news/2020/0...,T
3,1,,Alabama,shelter_in_place,2020-04-04,,https://governor.alabama.gov/assets/2020/04/Fi...,outside rec is okay with proper social distan...
4,1,,Alabama,closing_of_public_venues,2020-03-28,,http://www.alabamapublichealth.gov/news/2020/0...,3/19/20 order only for restaurants to do take ...


### Pivoting the table by fip_code

In [117]:
npisLookupDf = npisDf.pivot(index='fip_code', columns='npi', values=['start_date'])
npisLookupDf.columns = npisLookupDf.columns.droplevel()
npisLookupDf = npisLookupDf.reset_index()
colsOrder = ['fip_code', 'gathering_size_500_to_101', 'gathering_size_100_to_26', 'gathering_size_25_to_11', 'gathering_size_10_0',
                'school_closure', 'closing_of_public_venues', 'non-essential_services_closure',
                 'social_distancing', 'shelter_in_place', 'religious_gatherings_banned', 'lockdown']
npisLookupDf = npisLookupDf[colsOrder]
npisLookupDf.head()
print(npisLookupDf.shape)
npisLookupDf.head()

(384, 12)


npi,fip_code,gathering_size_500_to_101,gathering_size_100_to_26,gathering_size_25_to_11,gathering_size_10_0,school_closure,closing_of_public_venues,non-essential_services_closure,social_distancing,shelter_in_place,religious_gatherings_banned,lockdown
0,1,2020-03-13,2020-03-16,2020-03-19,2020-03-28,2020-03-20,2020-03-28,2020-03-28,2020-04-03,2020-04-04,2020-04-04,NaT
1,2,NaT,NaT,NaT,2020-03-24,2020-03-16,2020-03-18,2020-03-28,2020-03-28,2020-03-28,2020-03-24,NaT
2,4,NaT,NaT,NaT,NaT,2020-03-15,2020-03-31,2020-03-31,2020-03-31,2020-03-31,NaT,NaT
3,5,NaT,NaT,NaT,2020-03-27,2020-03-19,2020-03-20,2020-04-06,2020-03-27,NaT,NaT,NaT
4,6,2020-03-11,NaT,NaT,2020-03-16,2020-03-19,2020-03-19,2020-03-19,2020-03-19,2020-03-19,2020-03-19,NaT


In [131]:
def inherit_gathering_int(row):
    if pd.isnull(row['gathering_size_25_to_11']):
        row['gathering_size_25_to_11'] = row['gathering_size_10_0']
    if pd.isnull(row['gathering_size_100_to_26']):
        row['gathering_size_100_to_26'] = row['gathering_size_25_to_11']
    if pd.isnull(row['gathering_size_500_to_101']):
        row['gathering_size_500_to_101'] = row['gathering_size_100_to_26']
    return row
npisLookupDf = npisLookupDf.apply(lambda x: inherit_gathering_int(x), axis=1)
npisLookupDf.head()

npi,fip_code,gathering_size_500_to_101,gathering_size_100_to_26,gathering_size_25_to_11,gathering_size_10_0,school_closure,closing_of_public_venues,non-essential_services_closure,social_distancing,shelter_in_place,religious_gatherings_banned,lockdown
0,1,2020-03-13,2020-03-16,2020-03-19,2020-03-28,2020-03-20,2020-03-28,2020-03-28,2020-04-03,2020-04-04,2020-04-04,NaT
1,2,2020-03-24,2020-03-24,2020-03-24,2020-03-24,2020-03-16,2020-03-18,2020-03-28,2020-03-28,2020-03-28,2020-03-24,NaT
2,4,NaT,NaT,NaT,NaT,2020-03-15,2020-03-31,2020-03-31,2020-03-31,2020-03-31,NaT,NaT
3,5,2020-03-27,2020-03-27,2020-03-27,2020-03-27,2020-03-19,2020-03-20,2020-04-06,2020-03-27,NaT,NaT,NaT
4,6,2020-03-11,2020-03-16,2020-03-16,2020-03-16,2020-03-19,2020-03-19,2020-03-19,2020-03-19,2020-03-19,2020-03-19,NaT


In [132]:
npisLookupDf.to_csv("data/npis_lookup.csv", index=False)

In [133]:
npisLookupDf[npisLookupDf.fip_code==53033]

npi,fip_code,gathering_size_500_to_101,gathering_size_100_to_26,gathering_size_25_to_11,gathering_size_10_0,school_closure,closing_of_public_venues,non-essential_services_closure,social_distancing,shelter_in_place,religious_gatherings_banned,lockdown
372,53033,2020-03-11,2020-03-16,NaT,NaT,2020-03-12,2020-03-16,2020-03-16,2020-03-11,2020-03-23,2020-03-16,NaT


In [129]:
npisLookupDf.min()

npi
fip_code                                            1
gathering_size_500_to_101         2020-03-11 00:00:00
gathering_size_100_to_26          2020-03-12 00:00:00
gathering_size_25_to_11           2020-03-16 00:00:00
gathering_size_10_0               2020-03-13 00:00:00
school_closure                    2020-03-12 00:00:00
closing_of_public_venues          2020-03-13 00:00:00
non-essential_services_closure    2020-03-12 00:00:00
social_distancing                 2020-03-10 00:00:00
shelter_in_place                  2020-03-16 00:00:00
religious_gatherings_banned       2020-03-14 00:00:00
lockdown                          2020-03-15 00:00:00
dtype: object