In [191]:
from linearmodels.panel import PooledOLS, PanelOLS

import statsmodels.api as sm
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf

from statsmodels.iolib.summary2 import summary_col

## Read in data

In [192]:
us_daily = pd.read_csv("data/cleaned/daily_df.csv", index_col=0)
us_daily["date"] = pd.to_datetime(us_daily["date"])
us_daily = us_daily.set_index("date")

In [193]:
us_monthly = us_daily.groupby([pd.Grouper(freq='M'), 'state']).mean()
us_monthly
us_monthly = pd.DataFrame(us_monthly.to_records())

In [194]:
micperceptions =  pd.read_csv("data/cleaned/misperception_state_inferred.csv", index_col=0)
micperceptions = pd.DataFrame(micperceptions.to_records())
micperceptions.columns = ["state", "pc_misperception"]
micperceptions.pc_misperception = micperceptions.pc_misperception.str.rstrip('%').astype('float') / 100.0

In [195]:
us_cross_sectional = us_daily.loc[(us_daily.index >= "2020-03-01") & (us_daily.index <= "2021-01-31")].groupby(["state"]).mean()

In [196]:
us_cross_sectional = pd.merge(us_cross_sectional, micperceptions, on="state", how="left")

In [197]:
reported_compliance = pd.read_csv("data/covidstates/beh_2.csv", index_col=0)

US_state_abb = pd.read_csv("data/US_state_abb.csv")
US_state_abb.columns = ["state", "state_abb"]
reported_compliance = pd.merge(reported_compliance, US_state_abb, on="state_abb", how="left")


In [198]:
reported_compliance.columns = ["state_abb", "not_at_all", "not_very", "somewhat", "very", "error_margin", "N", "wave", "state"]

In [199]:
reported_compliance_cross_sectional = reported_compliance.groupby(["state"]).mean()
us_cross_sectional = pd.merge(us_cross_sectional, reported_compliance_cross_sectional, on="state", how="left")

## Disease outcomes

In [200]:
us_cross_sectional.columns

Index(['state', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResults', 'hospitalizedCurrently', 'hospitalizedCumulative',
       'inIcuCurrently', 'inIcuCumulative', 'onVentilatorCurrently',
       'onVentilatorCumulative', 'recovered', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 'posNeg',
       'dataQualityGrade', 'deathIncrease', 'hospitalizedIncrease',
       'commercialScore', 'negativeRegularScore', 'negativeScore',
       'positive

In [201]:
us_cross_sectional["deaths_pc"] = us_cross_sectional["deathIncrease"] / us_cross_sectional["pop_2019"]

In [202]:
us_cross_sectional["hospitalized_pc"] = us_cross_sectional["hospitalizedIncrease"] / us_cross_sectional["pop_2019"]

In [203]:
us_cross_sectional["positivity_rate"] = us_cross_sectional["positiveIncrease"] / us_cross_sectional["totalTestResultsIncrease"]

There is generally positive relationship between the level of misperceptions and severity of disease outcomes. 

In [204]:
cross_sectional_reg_1_posrate = smf.ols('positivity_rate ~ pc_misperception + StringencyIndex', data=us_cross_sectional).fit()
cross_sectional_reg_1_posrate.summary()

0,1,2,3
Dep. Variable:,positivity_rate,R-squared:,0.459
Model:,OLS,Adj. R-squared:,0.436
Method:,Least Squares,F-statistic:,19.96
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,5.31e-07
Time:,17:54:36,Log-Likelihood:,86.985
No. Observations:,50,AIC:,-168.0
Df Residuals:,47,BIC:,-162.2
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2246,0.063,3.581,0.001,0.098,0.351
pc_misperception,0.3886,0.334,1.165,0.250,-0.283,1.060
StringencyIndex,-0.0036,0.001,-5.704,0.000,-0.005,-0.002

0,1,2,3
Omnibus:,27.715,Durbin-Watson:,1.867
Prob(Omnibus):,0.0,Jarque-Bera (JB):,59.888
Skew:,1.582,Prob(JB):,9.9e-14
Kurtosis:,7.328,Cond. No.,2800.0


In [205]:
us_cross_sectional["res"] = cross_sectional_reg_1_posrate.resid

In [206]:
us_cross_sectional[["state", "pc_misperception"]].sort_values("pc_misperception")

Unnamed: 0,state,pc_misperception
49,Vermont,0.09
13,Hawaii,0.09
55,Wyoming,0.11
41,Pennsylvania,0.11
40,Oregon,0.11
7,Connecticut,0.11
8,Delaware,0.11
14,Idaho,0.11
23,Massachusetts,0.11
54,Wisconsin,0.125


In [207]:
cross_sectional_reg_1_hosp = smf.ols('hospitalized_pc ~ pc_misperception', data=us_cross_sectional).fit()
cross_sectional_reg_1_hosp.summary()

0,1,2,3
Dep. Variable:,hospitalized_pc,R-squared:,0.042
Model:,OLS,Adj. R-squared:,0.022
Method:,Least Squares,F-statistic:,2.09
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.155
Time:,17:54:38,Log-Likelihood:,522.52
No. Observations:,50,AIC:,-1041.0
Df Residuals:,48,BIC:,-1037.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-1.617e-06,7.36e-06,-0.220,0.827,-1.64e-05,1.32e-05
pc_misperception,7.604e-05,5.26e-05,1.446,0.155,-2.97e-05,0.000

0,1,2,3
Omnibus:,2.557,Durbin-Watson:,2.33
Prob(Omnibus):,0.279,Jarque-Bera (JB):,1.684
Skew:,0.218,Prob(JB):,0.431
Kurtosis:,2.214,Cond. No.,53.1


In [208]:
cross_sectional_reg_1_dea = smf.ols('deaths_pc ~ pc_misperception', data=us_cross_sectional).fit()
cross_sectional_reg_1_dea.summary()

0,1,2,3
Dep. Variable:,deaths_pc,R-squared:,0.085
Model:,OLS,Adj. R-squared:,0.066
Method:,Least Squares,F-statistic:,4.462
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.0399
Time:,17:54:39,Log-Likelihood:,600.36
No. Observations:,50,AIC:,-1197.0
Df Residuals:,48,BIC:,-1193.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.258e-07,1.55e-06,0.339,0.736,-2.59e-06,3.65e-06
pc_misperception,2.343e-05,1.11e-05,2.112,0.040,1.13e-06,4.57e-05

0,1,2,3
Omnibus:,0.662,Durbin-Watson:,1.949
Prob(Omnibus):,0.718,Jarque-Bera (JB):,0.754
Skew:,0.242,Prob(JB):,0.686
Kurtosis:,2.643,Cond. No.,53.1


In [209]:
cross_sectional_reg_2_posrate = smf.ols('positivity_rate ~ pc_misperception + Median_Household_Income_2019 + StringencyIndex', data=us_cross_sectional).fit()
cross_sectional_reg_2_posrate.summary()

0,1,2,3
Dep. Variable:,positivity_rate,R-squared:,0.492
Model:,OLS,Adj. R-squared:,0.459
Method:,Least Squares,F-statistic:,14.84
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,6.8e-07
Time:,17:54:39,Log-Likelihood:,88.54
No. Observations:,50,AIC:,-169.1
Df Residuals:,46,BIC:,-161.4
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3140,0.081,3.899,0.000,0.152,0.476
pc_misperception,0.1727,0.350,0.493,0.624,-0.532,0.878
Median_Household_Income_2019,-1.118e-06,6.51e-07,-1.718,0.093,-2.43e-06,1.92e-07
StringencyIndex,-0.0033,0.001,-5.229,0.000,-0.005,-0.002

0,1,2,3
Omnibus:,25.579,Durbin-Watson:,1.832
Prob(Omnibus):,0.0,Jarque-Bera (JB):,49.784
Skew:,1.508,Prob(JB):,1.55e-11
Kurtosis:,6.848,Cond. No.,3880000.0


In [210]:
cross_sectional_reg_2_dea = smf.ols('deaths_pc ~ pc_misperception + StringencyIndex', data=us_cross_sectional).fit()
cross_sectional_reg_2_dea.summary()

0,1,2,3
Dep. Variable:,deaths_pc,R-squared:,0.106
Model:,OLS,Adj. R-squared:,0.068
Method:,Least Squares,F-statistic:,2.778
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.0724
Time:,17:54:48,Log-Likelihood:,600.93
No. Observations:,50,AIC:,-1196.0
Df Residuals:,47,BIC:,-1190.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.086e-06,2.15e-06,0.968,0.338,-2.25e-06,6.42e-06
pc_misperception,2.037e-05,1.15e-05,1.777,0.082,-2.69e-06,4.34e-05
StringencyIndex,-2.259e-08,2.17e-08,-1.042,0.303,-6.62e-08,2.1e-08

0,1,2,3
Omnibus:,1.184,Durbin-Watson:,1.84
Prob(Omnibus):,0.553,Jarque-Bera (JB):,1.069
Skew:,0.344,Prob(JB):,0.586
Kurtosis:,2.8,Cond. No.,2800.0


In [211]:
cross_sectional_reg_2_hosp = smf.ols('hospitalized_pc ~ pc_misperception + StringencyIndex', data=us_cross_sectional).fit()
cross_sectional_reg_2_hosp.summary()

0,1,2,3
Dep. Variable:,hospitalized_pc,R-squared:,0.072
Model:,OLS,Adj. R-squared:,0.032
Method:,Least Squares,F-statistic:,1.811
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.175
Time:,17:54:48,Log-Likelihood:,523.32
No. Observations:,50,AIC:,-1041.0
Df Residuals:,47,BIC:,-1035.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7.069e-06,1.02e-05,0.695,0.491,-1.34e-05,2.75e-05
pc_misperception,5.9e-05,5.41e-05,1.090,0.281,-4.99e-05,0.000
StringencyIndex,-1.258e-07,1.02e-07,-1.229,0.225,-3.32e-07,8.01e-08

0,1,2,3
Omnibus:,1.929,Durbin-Watson:,2.266
Prob(Omnibus):,0.381,Jarque-Bera (JB):,1.441
Skew:,0.208,Prob(JB):,0.486
Kurtosis:,2.28,Cond. No.,2800.0


In [212]:
cross_sectional_reg_3_posrate = smf.ols('positivity_rate ~ pc_misperception + Urbanization_rate', data=us_cross_sectional).fit()
cross_sectional_reg_3_posrate.summary()

0,1,2,3
Dep. Variable:,positivity_rate,R-squared:,0.109
Model:,OLS,Adj. R-squared:,0.071
Method:,Least Squares,F-statistic:,2.88
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.0661
Time:,17:54:49,Log-Likelihood:,74.504
No. Observations:,50,AIC:,-143.0
Df Residuals:,47,BIC:,-137.3
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0297,0.075,0.397,0.693,-0.121,0.180
pc_misperception,0.8232,0.417,1.975,0.054,-0.015,1.661
Urbanization_rate,-0.0006,0.001,-1.131,0.264,-0.002,0.000

0,1,2,3
Omnibus:,23.725,Durbin-Watson:,1.876
Prob(Omnibus):,0.0,Jarque-Bera (JB):,37.885
Skew:,1.537,Prob(JB):,5.93e-09
Kurtosis:,5.955,Cond. No.,3970.0


In [213]:
cross_sectional_reg_3_dea = smf.ols('deaths_pc ~ pc_misperception + Urbanization_rate ', data=us_cross_sectional).fit()
cross_sectional_reg_3_dea.summary()

0,1,2,3
Dep. Variable:,deaths_pc,R-squared:,0.14
Model:,OLS,Adj. R-squared:,0.104
Method:,Least Squares,F-statistic:,3.83
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.0288
Time:,17:54:49,Log-Likelihood:,601.91
No. Observations:,50,AIC:,-1198.0
Df Residuals:,47,BIC:,-1192.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-1.631e-06,1.96e-06,-0.830,0.410,-5.58e-06,2.32e-06
pc_misperception,2.556e-05,1.09e-05,2.338,0.024,3.57e-06,4.76e-05
Urbanization_rate,2.528e-08,1.46e-08,1.735,0.089,-4.03e-09,5.46e-08

0,1,2,3
Omnibus:,0.714,Durbin-Watson:,1.975
Prob(Omnibus):,0.7,Jarque-Bera (JB):,0.766
Skew:,0.103,Prob(JB):,0.682
Kurtosis:,2.43,Cond. No.,3970.0


In [214]:
cross_sectional_reg_3_hosp = smf.ols('hospitalized_pc ~ pc_misperception + Median_Household_Income_2019 + Urbanization_rate', data=us_cross_sectional).fit()
cross_sectional_reg_3_hosp.summary()

0,1,2,3
Dep. Variable:,hospitalized_pc,R-squared:,0.052
Model:,OLS,Adj. R-squared:,-0.01
Method:,Least Squares,F-statistic:,0.8401
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.479
Time:,17:54:49,Log-Likelihood:,522.79
No. Observations:,50,AIC:,-1038.0
Df Residuals:,46,BIC:,-1030.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-6.647e-06,1.27e-05,-0.523,0.604,-3.23e-05,1.9e-05
pc_misperception,8.263e-05,5.96e-05,1.386,0.172,-3.74e-05,0.000
Median_Household_Income_2019,1.239e-11,1.35e-10,0.092,0.927,-2.59e-10,2.84e-10
Urbanization_rate,4.496e-08,9.06e-08,0.496,0.622,-1.37e-07,2.27e-07

0,1,2,3
Omnibus:,1.812,Durbin-Watson:,2.32
Prob(Omnibus):,0.404,Jarque-Bera (JB):,1.366
Skew:,0.191,Prob(JB):,0.505
Kurtosis:,2.286,Cond. No.,3890000.0


Nothing really interesting here. 

## Relationship between misperception and mobility

In [219]:
cross_sectional_reg_4_posrate = smf.ols("very ~ pc_misperception + StringencyIndex + Urbanization_rate + AverageTemperature", data=us_cross_sectional).fit()
cross_sectional_reg_4_posrate.summary()

0,1,2,3
Dep. Variable:,very,R-squared:,0.558
Model:,OLS,Adj. R-squared:,0.519
Method:,Least Squares,F-statistic:,14.22
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,1.41e-07
Time:,17:55:41,Log-Likelihood:,-140.01
No. Observations:,50,AIC:,290.0
Df Residuals:,45,BIC:,299.6
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,29.4055,6.575,4.472,0.000,16.162,42.649
pc_misperception,48.3416,33.481,1.444,0.156,-19.092,115.775
StringencyIndex,0.3588,0.062,5.743,0.000,0.233,0.485
Urbanization_rate,0.1037,0.044,2.339,0.024,0.014,0.193
AverageTemperature,0.2346,0.133,1.758,0.086,-0.034,0.503

0,1,2,3
Omnibus:,1.126,Durbin-Watson:,1.508
Prob(Omnibus):,0.57,Jarque-Bera (JB):,0.849
Skew:,-0.318,Prob(JB):,0.654
Kurtosis:,2.948,Cond. No.,5200.0


In [119]:
us_cross_sectional[["pc_misperception", "StringencyIndex", "Urbanization_rate", "AverageTemperature"]]

Unnamed: 0,pc_misperception,StringencyIndex,Urbanization_rate,AverageTemperature
pc_misperception,1.0,-0.256171,-0.112654,0.263368
StringencyIndex,-0.256171,1.0,0.273593,0.013547
Urbanization_rate,-0.112654,0.273593,1.0,0.229481
AverageTemperature,0.263368,0.013547,0.229481,1.0


In [112]:
us_cross_sectional["res"] = cross_sectional_reg_4_posrate.resid

In [113]:
us_cross_sectional[["state", "res", "non_residential_percent_change_from_baseline", "pc_misperception"]].sort_values("res")

Unnamed: 0,state,res,non_residential_percent_change_from_baseline,pc_misperception
27,Nevada,-16.341878,-24.681682,0.15
44,Vermont,-14.619181,-5.500348,0.09
33,North Dakota,-14.535208,0.845921,0.145
1,Alaska,-10.01652,3.1,0.16
2,Arizona,-9.67757,-21.84491,0.14
10,Hawaii,-7.455777,-39.502994,0.09
31,New York,-7.106936,-21.045833,0.17
5,Colorado,-6.626982,-11.531138,0.135
0,Alabama,-5.064805,-5.309366,0.16
4,California,-4.803034,-26.760479,0.15


In [91]:
us_daily.loc[us_daily["state"]=="Alaska", "non_residential_percent_change_from_baseline"]

date
2021-02-17     NaN
2021-02-16     NaN
2021-02-15     NaN
2021-02-14     NaN
2021-02-13     NaN
              ... 
2020-03-10     5.0
2020-03-09     3.6
2020-03-08    13.2
2020-03-07    10.2
2020-03-06     6.4
Name: non_residential_percent_change_from_baseline, Length: 349, dtype: float64

In [None]:
'retail_and_recreation_percent_change_from_baseline',
       'grocery_and_pharmacy_percent_change_from_baseline',
       'parks_percent_change_from_baseline',
       'transit_stations_percent_change_from_baseline',
       'workplaces_percent_change_from_baseline',
       'residential_percent_change_from_baseline',