# Draft Random Effects

In [1]:
import pandas as pd
import numpy as np

# Mandates

In [2]:
mask_mandates = pd.read_parquet('cdc_mask_mandates.parquet')
bar_closings = pd.read_parquet('cdc_bar_closings.parquet').rename(columns={'action':'bar_status'})
restaurant_closing = pd.read_parquet('cdc_restaurant_closings.parquet').rename(columns={'action':'restaurant_status'})
gathering_bans = pd.read_parquet('cdc_gathering_bans.parquet')
stay_at_home = pd.read_parquet('cdc_stay_at_home.parquet').rename(columns={'current_order_status':'stay_at_home'})

In [3]:
bar_closings['fips_code_text'] = bar_closings['state_fips'] + bar_closings['county_fips']
restaurant_closing['fips_code_text'] = restaurant_closing['state_fips'] + restaurant_closing['county_fips']
gathering_bans['fips_code_text'] = gathering_bans['state_fips'] + gathering_bans['county_fips']
stay_at_home['fips_code_text'] = stay_at_home['state_fips'] + stay_at_home['county_fips']


In [4]:
all_mandates = mask_mandates.merge(bar_closings[['fips_code_text','date','bar_status']]
                    ,left_on=['date','fips_code_text']
                    ,right_on=['date','fips_code_text'],
                              how='outer'
                   ).merge(restaurant_closing[['fips_code_text','date','restaurant_status']]
                    ,left_on=['date','fips_code_text']
                    ,right_on=['date','fips_code_text'],
                            how='outer'
                    ).merge(gathering_bans[['fips_code_text','date','general_gb_order_group']],
                             how='outer',
                   ).merge(stay_at_home[['fips_code_text','date','stay_at_home']],
                            how='outer'
                          ).sort_values(by=['fips_code_text','date'])


In [5]:
mandate_list = ['face_masks_required_in_public', 'bar_status',
       'restaurant_status', 'general_gb_order_group','stay_at_home']
for mandate in mandate_list:
    all_mandates[f'{mandate}'] = all_mandates.groupby('fips_code_text')[f'{mandate}'].ffill()
all_mandates['gathering_ban_binary'] = np.where(all_mandates['general_gb_order_group']=='No order found','No','Yes')
all_mandates_complete = all_mandates.dropna()

In [6]:
all_mandates_complete.head()

Unnamed: 0,state_code,county_name,fips_code_text,state_fips,county_fips,date,face_masks_required_in_public,bar_status,restaurant_status,general_gb_order_group,stay_at_home,gathering_ban_binary
387677,Alabama,Autauga,1001,1,1,2020-07-16,Public mask mandate,Open with social distancing/reduced seating/en...,Open with social distancing/reduced seating/en...,Bans gatherings of any size,Advisory/Recommendation,Yes
638448,Alabama,Autauga,1001,1,1,2020-07-17,Public mask mandate,Open with social distancing/reduced seating/en...,Open with social distancing/reduced seating/en...,Bans gatherings of any size,Advisory/Recommendation,Yes
467003,Alabama,Autauga,1001,1,1,2020-07-18,Public mask mandate,Open with social distancing/reduced seating/en...,Open with social distancing/reduced seating/en...,Bans gatherings of any size,Advisory/Recommendation,Yes
467004,Alabama,Autauga,1001,1,1,2020-07-19,Public mask mandate,Open with social distancing/reduced seating/en...,Open with social distancing/reduced seating/en...,Bans gatherings of any size,Advisory/Recommendation,Yes
1761369,Alabama,Autauga,1001,1,1,2020-07-20,Public mask mandate,Open with social distancing/reduced seating/en...,Open with social distancing/reduced seating/en...,Bans gatherings of any size,Advisory/Recommendation,Yes


In [7]:
len(all_mandates_complete)

1754697

# Other data

In [8]:
vaccinations = pd.read_parquet(r'cdc_vaccinations.parquet').rename({'fips_code':'fips_code_text'})
cases = pd.read_parquet('../cases_daily.parquet')
cases['fips_code_text'] = cases['fips'].astype(str).str.pad(width=5,fillchar='0')
cases['date'] = pd.to_datetime(cases['date'])

In [9]:
vaccine_hesitancy = pd.read_parquet('cdc_vaccine_hesitancy.parquet')
vaccine_hesitancy['fips_code_text'] = vaccine_hesitancy['fips_code_text'].astype(str).str.pad(width=5,fillchar='0')

In [10]:
non_epidemic = pd.read_csv('../non_epidemic_data.csv')
non_epidemic['fips_code_text'] = non_epidemic['fips'].astype(str).str.pad(width=5,fillchar='0')

In [11]:
vaccinations = vaccinations[vaccinations['fips_code'] != 'UNK']
vaccinations['fips_code_text'] = vaccinations['fips_code'].astype(int).astype(str).str.pad(width=5,fillchar='0')

# Create final df

In [13]:
final_df = all_mandates_complete.merge(vaccinations[['fips_code_text','date','pct_fully_vaccinated']], left_on=['fips_code_text','date'],right_on=['fips_code_text','date']
                                       ,how='left'
                  ).merge(cases,how='left'
                         ).merge(non_epidemic.drop(columns=['fips'])
                                ).merge(vaccine_hesitancy.drop(columns=['state_code','state_fips','county_fips','county_name','state'])
                                                              )
final_df['pct_fully_vaccinated'] = final_df['pct_fully_vaccinated'].astype(float).fillna(0)
convert_cols = ['percent_hispanic', 'percent_non_hispanic_black', 'percent_non_hispanic_white', 'estimated_hesitant', 'estimated_strongly_hesitant']
for col in convert_cols:
    final_df[f'{col}'] = final_df[f'{col}'].astype(float)
    

In [14]:
final_df['moving'] = final_df.groupby('fips_code_text')['cases'].transform(lambda x: x.rolling(7, 1).mean())
final_df['cases_per_100k'] = final_df['cases']/final_df['pop_estimate_2019'] * 100000
final_df['moving_cases_per_100k'] = final_df['moving']/final_df['pop_estimate_2019'] * 100000
for i in range(1,8):
    final_df[f'cases_lag_{i}'] = final_df.groupby('fips_code_text')['cases_per_100k'].shift(i)
    final_df[f'moving_cases_lag_{i}'] = final_df.groupby('fips_code_text')['moving_cases_per_100k'].shift(i)

In [15]:
modeling_df = pd.get_dummies(final_df,columns=['face_masks_required_in_public', 'bar_status',
       'restaurant_status','stay_at_home','gathering_ban_binary'])
modeling_df

Unnamed: 0,state_code,county_name,fips_code_text,state_fips,county_fips,date,general_gb_order_group,pct_fully_vaccinated,fips,cases,...,restaurant_status_NA,restaurant_status_Open with social distancing/reduced seating/enhanced sanitation,stay_at_home_Advisory/Recommendation,stay_at_home_Mandatory - all people,stay_at_home_Mandatory - all people in certain areas of state,stay_at_home_Mandatory - at-risk people only,stay_at_home_Mandatory - minors only,stay_at_home_No order found,gathering_ban_binary_No,gathering_ban_binary_Yes
0,Alabama,Autauga,01001,01,001,2020-07-16,Bans gatherings of any size,0.0,1001.0,24.0,...,0,1,1,0,0,0,0,0,0,1
1,Alabama,Autauga,01001,01,001,2020-07-17,Bans gatherings of any size,0.0,1001.0,9.0,...,0,1,1,0,0,0,0,0,0,1
2,Alabama,Autauga,01001,01,001,2020-07-18,Bans gatherings of any size,0.0,1001.0,38.0,...,0,1,1,0,0,0,0,0,0,1
3,Alabama,Autauga,01001,01,001,2020-07-19,Bans gatherings of any size,0.0,1001.0,15.0,...,0,1,1,0,0,0,0,0,0,1
4,Alabama,Autauga,01001,01,001,2020-07-20,Bans gatherings of any size,0.0,1001.0,15.0,...,0,1,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1687318,Wyoming,Weston,56045,56,045,2022-06-01,No order found,38.2,56045.0,0.0,...,0,0,0,0,0,0,0,1,1,0
1687319,Wyoming,Weston,56045,56,045,2022-06-02,No order found,38.2,56045.0,0.0,...,0,0,0,0,0,0,0,1,1,0
1687320,Wyoming,Weston,56045,56,045,2022-06-03,No order found,38.3,,,...,0,0,0,0,0,0,0,1,1,0
1687321,Wyoming,Weston,56045,56,045,2022-06-04,No order found,38.3,,,...,0,0,0,0,0,0,0,1,1,0


In [16]:
modeling_vars = ['unemployment_rate_2020',
'median_household_income_2019',
'poverty_frac_2019',
'no_high_school',
'high_school_only',
'college_only',
'voted_biden',
'percent_hispanic',
'percent_non_hispanic_black',
'percent_non_hispanic_white',
'face_masks_required_in_public_Public mask mandate',
'moving_cases_lag_1',
'moving_cases_lag_2',
'moving_cases_lag_3',
'moving_cases_lag_4',
'moving_cases_lag_5',
'moving_cases_lag_6',
'estimated_hesitant',
'estimated_strongly_hesitant',
'pct_fully_vaccinated',
]


In [17]:
modeling_df_final = modeling_df[~modeling_df[modeling_vars].isna().any(axis=1)]

In [19]:
modeling_df_final.head()

Unnamed: 0,state_code,county_name,fips_code_text,state_fips,county_fips,date,general_gb_order_group,pct_fully_vaccinated,fips,cases,...,restaurant_status_NA,restaurant_status_Open with social distancing/reduced seating/enhanced sanitation,stay_at_home_Advisory/Recommendation,stay_at_home_Mandatory - all people,stay_at_home_Mandatory - all people in certain areas of state,stay_at_home_Mandatory - at-risk people only,stay_at_home_Mandatory - minors only,stay_at_home_No order found,gathering_ban_binary_No,gathering_ban_binary_Yes
6,Alabama,Autauga,1001,1,1,2020-07-22,Bans gatherings of any size,0.0,1001.0,21.0,...,0,1,1,0,0,0,0,0,0,1
7,Alabama,Autauga,1001,1,1,2020-07-23,Bans gatherings of any size,0.0,1001.0,19.0,...,0,1,1,0,0,0,0,0,0,1
8,Alabama,Autauga,1001,1,1,2020-07-24,Bans gatherings of any size,0.0,1001.0,16.0,...,0,1,1,0,0,0,0,0,0,1
9,Alabama,Autauga,1001,1,1,2020-07-25,Bans gatherings of any size,0.0,1001.0,11.0,...,0,1,1,0,0,0,0,0,0,1
10,Alabama,Autauga,1001,1,1,2020-07-26,Bans gatherings of any size,0.0,1001.0,10.0,...,0,1,1,0,0,0,0,0,0,1


In [20]:
print(len(modeling_df_final))

1667360


## Model

In [21]:
from linearmodels import PanelOLS
#from linearmodels import RandomEffects
#!pip install statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [84]:
# modeling_df_final['state_code'#].value_counts()
modeling_df_final.columns = modeling_df_final.columns.str.replace(' ', '_')
modeling_df_final.columns = modeling_df_final.columns.str.replace('/', '_')
modeling_df_final.columns = modeling_df_final.columns.str.replace('-', '_')
modeling_df_final['date'] = pd.to_datetime(modeling_df_final['date'])
modeling_df_final['weekday'] = modeling_df_final['date'].dt.weekday.astype(str)
modeling_df_final['month'] = modeling_df_final['date'].dt.month.astype(str)
modeling_df_final['year'] = modeling_df_final['date'].dt.year.astype(str)
modeling_df_final = pd.get_dummies(modeling_df_final,columns=['weekday','year','month'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modeling_df_final['date'] = pd.to_datetime(modeling_df_final['date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modeling_df_final['weekday'] = modeling_df_final['date'].dt.weekday.astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modeling_df_final['month'] = modeling_df_final['date'].

In [78]:
# sample_df = modeling_df_final[modeling_df['state_code'].isin(['Alabama','California','District of Columbia','Mississippi'])].copy()


# sample_df

In [89]:
formula = '''
moving_cases_per_100k ~
moving_cases_lag_1
+moving_cases_lag_2
+moving_cases_lag_3
+moving_cases_lag_4
+moving_cases_lag_5
+ unemployment_rate_2020
+ median_household_income_2019
+ poverty_frac_2019
+ no_high_school
+ high_school_only
+ college_only
+ voted_biden
+ percent_hispanic
+ percent_non_hispanic_black
+ percent_non_hispanic_white
+ face_masks_required_in_public_Public_mask_mandate
+ stay_at_home_Mandatory___all_people
+ gathering_ban_binary_Yes
+ pct_fully_vaccinated
+ weekday_0
+ weekday_1
+ weekday_2
+ weekday_3
+ weekday_4
+ weekday_5
+ year_2020
+ year_2021
+ month_1
+ month_10
+ month_11
+ month_2
+ month_3
+ month_4
+ month_5
+ month_6
+ month_7
+ month_8
+ month_9
'''

In [90]:
modeling_df_final.groupby(['face_masks_required_in_public_Public_mask_mandate'])['moving_cases_per_100k'].mean()

face_masks_required_in_public_Public_mask_mandate
0    37.092261
1    32.939779
Name: moving_cases_per_100k, dtype: float64

In [91]:
model = smf.ols(formula,modeling_df_final)
result = model.fit()
print(result.summary2())

                                  Results: Ordinary least squares
Model:                      OLS                           Adj. R-squared:             0.937        
Dependent Variable:         moving_cases_per_100k         AIC:                        13236211.7956
Date:                       2022-06-07 16:22              BIC:                        13236692.5389
No. Observations:           1667360                       Log-Likelihood:             -6.6181e+06  
Df Model:                   38                            F-statistic:                6.502e+05    
Df Residuals:               1667321                       Prob (F-statistic):         0.00         
R-squared:                  0.937                         Scale:                      164.11       
---------------------------------------------------------------------------------------------------
                                                   Coef.  Std.Err.     t     P>|t|   [0.025  0.975]
----------------------------------

In [92]:
%%time 
model = smf.mixedlm(formula,modeling_df_final, groups=modeling_df_final["fips_code_text"])
result = model.fit(method=["bfgs"])
print(result.summary())


                            Mixed Linear Model Regression Results
Model:                     MixedLM          Dependent Variable:          moving_cases_per_100k
No. Observations:          1667360          Method:                      REML                 
No. Groups:                2422             Scale:                       164.0974             
Min. group size:           507              Log-Likelihood:              -6618187.4432        
Max. group size:           781              Converged:                   Yes                  
Mean group size:           688.4                                                              
----------------------------------------------------------------------------------------------
                                                  Coef.  Std.Err.    z     P>|z| [0.025 0.975]
----------------------------------------------------------------------------------------------
Intercept                                          3.481    0.353    9.858 0.00