# EDA of Google Trends data

In [91]:
from linearmodels.panel import PooledOLS, PanelOLS

import statsmodels.api as sm
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf

In [2]:
trend_month_state = pd.read_csv("data/cleaned/trend_month_state.csv", index_col=0)
trend_month_DMA = pd.read_csv("data/cleaned/trend_month_DMA.csv", index_col=0)
trend_all_time_state = pd.read_csv("data/cleaned/trend_all_time_state.csv", index_col=0)
trend_all_time_DMA = pd.read_csv("data/cleaned/trend_all_time_DMA.csv", index_col=0)

In [3]:
trend_all_time_state.replace(0, np.nan, inplace = True)
trend_month_state.replace(0, np.nan, inplace = True)

## EDA

In [4]:
trend_all_time_state.head()

Unnamed: 0,date_range,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab
0,2020-03-01 2021-01-31,Alabama,52.0,38.0,80.0,57.0,39.0
1,2020-03-01 2021-01-31,Alaska,90.0,94.0,49.0,77.0,46.0
2,2020-03-01 2021-01-31,Arizona,63.0,48.0,79.0,53.0,49.0
3,2020-03-01 2021-01-31,Arkansas,51.0,46.0,79.0,51.0,52.0
4,2020-03-01 2021-01-31,California,54.0,49.0,56.0,58.0,49.0


In [5]:
trend_all_time_state[
    ["covid_conspiracy", "covid_hoax", "pizzagate", "plandemic", "wuhan_lab"]
].corr(method="pearson")

Unnamed: 0,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab
covid_conspiracy,1.0,0.253764,-0.120353,0.487821,0.135147
covid_hoax,0.253764,1.0,-0.112811,0.317223,0.234428
pizzagate,-0.120353,-0.112811,1.0,-0.239771,-0.300099
plandemic,0.487821,0.317223,-0.239771,1.0,0.128978
wuhan_lab,0.135147,0.234428,-0.300099,0.128978,1.0


It turns out that popularity of COVID-related conspiracy theories is not strongly correlated with "pizzagate", which is not related to COVID-19 per se. Among all covid-19 related keywords, there is a positive, yet not necessarily strong correlation. 

### Principle component analyses

In [6]:
pca_pipe =  Pipeline([('inputer', SimpleImputer()), ('PCA', PCA(n_components=2))])
principalComponents = pca_pipe.fit_transform(
    trend_all_time_state[["covid_conspiracy", "covid_hoax", "plandemic", "wuhan_lab"]]
)
principalDf = pd.DataFrame(data=principalComponents)

In [7]:
pca_pipe["PCA"].explained_variance_ratio_

array([0.46055991, 0.23254269])

In [8]:
pca_pipe["PCA"].components_

array([[ 0.62681881,  0.47529325,  0.54963197,  0.28124579],
       [-0.43683202,  0.43500537, -0.25854643,  0.74370818]])

## Panel analyses

## Develop an index

### with "Wuhan Lab"

In [9]:
trend_all_time_state["index_1"] = trend_all_time_state[["covid_hoax", "covid_conspiracy", "plandemic", "wuhan_lab"]].mean(axis = 1)

### without "Wuhan Lab"

In [10]:
trend_all_time_state["index_2"] = trend_all_time_state[["covid_hoax", "covid_conspiracy", "plandemic"]].mean(axis = 1)

In [11]:
trend_all_time_state.sort_values("index_2", ascending = False)

Unnamed: 0,date_range,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,index_1,index_2
1,2020-03-01 2021-01-31,Alaska,90.0,94.0,49.0,77.0,46.0,76.75,87.0
29,2020-03-01 2021-01-31,New Hampshire,92.0,78.0,55.0,65.0,91.0,81.5,78.333333
45,2020-03-01 2021-01-31,Vermont,55.0,100.0,48.0,79.0,100.0,83.5,78.0
23,2020-03-01 2021-01-31,Minnesota,82.0,50.0,57.0,94.0,52.0,69.5,75.333333
34,2020-03-01 2021-01-31,North Dakota,100.0,39.0,54.0,85.0,38.0,65.5,74.666667
27,2020-03-01 2021-01-31,Nebraska,78.0,58.0,72.0,81.0,39.0,64.0,72.333333
50,2020-03-01 2021-01-31,Wyoming,71.0,,61.0,72.0,32.0,58.333333,71.5
37,2020-03-01 2021-01-31,Oregon,70.0,65.0,67.0,75.0,43.0,63.25,70.0
5,2020-03-01 2021-01-31,Colorado,77.0,67.0,63.0,66.0,35.0,61.25,70.0
12,2020-03-01 2021-01-31,Idaho,53.0,89.0,69.0,66.0,42.0,62.5,69.333333


## Regression analyses

### Relationship between conspiracy theory popularity and compliance (mobility)

In [12]:
us_daily = pd.read_csv("data/cleaned/daily_df.csv", index_col=0)
us_daily["date"] = pd.to_datetime(us_daily["date"])
us_daily = us_daily.set_index("date")

In [13]:
us_monthly = us_daily.groupby([pd.Grouper(freq='M'), 'state']).mean()
us_monthly

Unnamed: 0_level_0,Unnamed: 1_level_0,positive,probableCases,negative,pending,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,C6_Stay at home requirements,C6_Flag,C7_Restrictions on internal movement,C7_Flag,C8_International travel controls,ConfirmedCases,ConfirmedDeaths,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex
date,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2020-01-31,Florida,0.000000,,,,2.000000e+00,,,,,,...,0.0,,0.0,,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
2020-01-31,Massachusetts,,,,,2.700000e+00,,,,,,...,0.6,1.0,0.0,,0.0,0.300000,0.000000,2.220000,1.332000,1.536000
2020-01-31,Virginia,,,,,,,,,,,...,0.0,,0.0,,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
2020-01-31,Washington,1.333333,,,,0.000000e+00,,,,,,...,0.0,,0.0,,0.0,1.000000,0.000000,0.000000,3.394211,3.913684
2020-02-29,Florida,0.000000,,,,1.024138e+01,,,,,,...,0.0,,0.0,,0.0,0.000000,0.000000,0.958621,0.574138,0.663793
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-28,Virginia,533521.941176,108944.411765,,251.411765,5.468171e+06,2196.411765,22329.588235,446.235294,,280.117647,...,2.0,1.0,0.0,,0.0,533521.941176,6835.823529,53.373529,53.283529,57.631176
2021-02-28,Washington,321918.647059,15809.117647,,,4.761709e+06,740.529412,18326.117647,175.411765,,69.176471,...,1.0,0.0,1.0,0.0,2.0,323048.647059,4541.058824,62.960000,63.220000,61.410000
2021-02-28,West Virginia,125328.235294,24920.941176,1.907528e+06,,2.032857e+06,373.470588,,100.058824,,47.000000,...,1.0,0.0,1.0,1.0,2.0,125328.235294,2144.235294,46.300000,50.110000,57.820000
2021-02-28,Wisconsin,602149.941176,51173.529412,2.543102e+06,38.176471,6.377024e+06,531.882353,24978.058824,137.176471,2206.823529,,...,1.0,1.0,1.0,1.0,0.0,602149.941176,6648.176471,47.690000,54.112000,58.588000


In [14]:
us_monthly = pd.DataFrame(us_monthly.to_records())

In [15]:
trend_month_state["date"] = pd.to_datetime(trend_month_state["date_range"].str.slice(start = -10))

In [16]:
trend_month_state = trend_month_state.drop("date_range", axis = 1)

In [17]:
trend_month_state

Unnamed: 0,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,date
0,Alabama,29.0,10.0,76.0,,33.0,2020-03-31
1,Alaska,73.0,80.0,97.0,,100.0,2020-03-31
2,Arizona,34.0,25.0,60.0,,31.0,2020-03-31
3,Arkansas,100.0,,11.0,,34.0,2020-03-31
4,California,41.0,18.0,44.0,18.0,42.0,2020-03-31
...,...,...,...,...,...,...,...
556,Virginia,19.0,14.0,10.0,5.0,14.0,2021-01-31
557,Washington,23.0,4.0,12.0,,52.0,2021-01-31
558,West Virginia,,,35.0,,,2021-01-31
559,Wisconsin,11.0,6.0,45.0,8.0,,2021-01-31


In [18]:
us_monthly = pd.merge(trend_month_state, us_monthly, on = ["state", "date"], how = "left")

In [19]:
us_monthly = us_monthly.set_index("date")

#### Cross-sectional

In [20]:
us_cross_sectional = us_daily.groupby("state").mean()

In [21]:
us_cross_sectional.head()

Unnamed: 0_level_0,positive,probableCases,negative,pending,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,C6_Stay at home requirements,C6_Flag,C7_Restrictions on internal movement,C7_Flag,C8_International travel controls,ConfirmedCases,ConfirmedDeaths,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,152878.247126,32034.665399,812813.5,33.0,934423.2,1184.939577,16614.2125,,1531.476038,,...,1.057803,0.966565,0.0,,0.0,153084.663793,2474.939655,35.718439,40.565723,42.81604
Alaska,15237.02071,,,8.5,510032.2,56.078431,389.485549,,,7.664151,...,0.758621,0.116883,1.706897,0.92638,1.936782,15512.859599,75.020057,53.132787,52.47181,53.053736
American Samoa,0.0,,1253.12,10.5,1197.673,,,,,,...,,,,,,,,,,
Arizona,230197.700855,12375.980159,1094707.0,44.24,2228371.0,1982.344051,18215.883191,531.459807,,347.585209,...,0.643678,1.0,0.212644,1.0,0.212644,230206.666667,4653.578348,42.127989,47.673678,46.597615
Arkansas,91312.756447,24916.214286,839528.9,47.588235,918856.8,538.387387,5427.229102,308.421384,43.0,100.00303,...,0.282421,1.0,0.236311,1.0,0.236311,91338.467049,1449.977077,44.070519,45.216657,47.682075


In [22]:
trend_all_time_state.drop("date_range", axis = 1)

Unnamed: 0,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,index_1,index_2
0,Alabama,52.0,38.0,80.0,57.0,39.0,46.5,49.0
1,Alaska,90.0,94.0,49.0,77.0,46.0,76.75,87.0
2,Arizona,63.0,48.0,79.0,53.0,49.0,53.25,54.666667
3,Arkansas,51.0,46.0,79.0,51.0,52.0,50.0,49.333333
4,California,54.0,49.0,56.0,58.0,49.0,52.5,53.666667
5,Colorado,77.0,67.0,63.0,66.0,35.0,61.25,70.0
6,Connecticut,39.0,49.0,48.0,63.0,64.0,53.75,50.333333
7,Delaware,17.0,60.0,78.0,28.0,60.0,41.25,35.0
8,District of Columbia,60.0,45.0,28.0,85.0,79.0,67.25,63.333333
9,Florida,50.0,39.0,59.0,45.0,44.0,44.5,44.666667


In [23]:
us_cross_sectional = pd.merge(us_cross_sectional, trend_all_time_state.drop("date_range", axis = 1), on="state")

In [24]:
us_cross_sectional[
    [
        "covid_conspiracy",
        "covid_hoax",
        "pizzagate",
        "plandemic",
        "wuhan_lab",
        "index_1",
        "index_2",
        "retail_and_recreation_percent_change_from_baseline",
        "grocery_and_pharmacy_percent_change_from_baseline",
        "parks_percent_change_from_baseline",
        "transit_stations_percent_change_from_baseline",
        "workplaces_percent_change_from_baseline",
        "residential_percent_change_from_baseline",
    ]
].corr()

Unnamed: 0,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,index_1,index_2,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
covid_conspiracy,1.0,0.253764,-0.120353,0.487821,0.135147,0.7216,0.784104,-0.01255,-0.022287,0.179848,-0.053732,0.069281,-0.004129
covid_hoax,0.253764,1.0,-0.112811,0.317223,0.234428,0.673981,0.691606,0.222925,0.241081,0.29888,0.119585,0.125759,-0.126679
pizzagate,-0.120353,-0.112811,1.0,-0.239771,-0.300099,-0.286249,-0.20842,0.40476,0.307774,-0.000874,0.404453,0.366606,-0.48548
plandemic,0.487821,0.317223,-0.239771,1.0,0.128978,0.724384,0.787842,0.011192,0.214272,0.443996,0.063424,0.05799,-0.038358
wuhan_lab,0.135147,0.234428,-0.300099,0.128978,1.0,0.544144,0.205043,-0.329921,-0.395268,0.020604,-0.454625,-0.403559,0.430357
index_1,0.7216,0.673981,-0.286249,0.724384,0.544144,1.0,0.931693,-0.033854,0.019215,0.354191,-0.113211,-0.046036,0.088378
index_2,0.784104,0.691606,-0.20842,0.787842,0.205043,0.931693,1.0,0.107221,0.199177,0.40852,0.073439,0.126204,-0.088919
retail_and_recreation_percent_change_from_baseline,-0.01255,0.222925,0.40476,0.011192,-0.329921,-0.033854,0.107221,1.0,0.8744,0.622192,0.859456,0.919312,-0.915298
grocery_and_pharmacy_percent_change_from_baseline,-0.022287,0.241081,0.307774,0.214272,-0.395268,0.019215,0.199177,0.8744,1.0,0.686546,0.845084,0.826631,-0.835964
parks_percent_change_from_baseline,0.179848,0.29888,-0.000874,0.443996,0.020604,0.354191,0.40852,0.622192,0.686546,1.0,0.489512,0.537091,-0.47314


In [25]:
us_cross_sectional.columns

Index(['state', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResults', 'hospitalizedCurrently', 'hospitalizedCumulative',
       'inIcuCurrently', 'inIcuCumulative', 'onVentilatorCurrently',
       'onVentilatorCumulative', 'recovered', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 'posNeg',
       'dataQualityGrade', 'deathIncrease', 'hospitalizedIncrease',
       'commercialScore', 'negativeRegularScore', 'negativeScore',
       'positive

In [26]:
results = smf.ols('non_residential_percent_change_from_baseline ~ index_2 + ContainmentHealthIndex', data=us_cross_sectional).fit()

In [27]:
results.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.318
Model:,OLS,Adj. R-squared:,0.289
Method:,Least Squares,F-statistic:,10.97
Date:,"Fri, 19 Feb 2021",Prob (F-statistic):,0.000123
Time:,17:09:40,Log-Likelihood:,-185.51
No. Observations:,50,AIC:,377.0
Df Residuals:,47,BIC:,382.8
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.1563,11.068,0.466,0.643,-17.110,27.423
index_2,0.3525,0.119,2.960,0.005,0.113,0.592
ContainmentHealthIndex,-0.6122,0.170,-3.592,0.001,-0.955,-0.269

0,1,2,3
Omnibus:,3.329,Durbin-Watson:,2.137
Prob(Omnibus):,0.189,Jarque-Bera (JB):,2.688
Skew:,-0.566,Prob(JB):,0.261
Kurtosis:,3.086,Cond. No.,589.0


In [28]:
results = smf.ols('non_residential_percent_change_from_baseline ~ plandemic + ContainmentHealthIndex', data=us_cross_sectional).fit()

In [29]:
results.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.347
Model:,OLS,Adj. R-squared:,0.32
Method:,Least Squares,F-statistic:,12.51
Date:,"Fri, 19 Feb 2021",Prob (F-statistic):,4.4e-05
Time:,17:09:40,Log-Likelihood:,-184.42
No. Observations:,50,AIC:,374.8
Df Residuals:,47,BIC:,380.6
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.3147,10.942,0.212,0.833,-19.698,24.327
plandemic,0.3187,0.095,3.356,0.002,0.128,0.510
ContainmentHealthIndex,-0.5506,0.168,-3.278,0.002,-0.889,-0.213

0,1,2,3
Omnibus:,2.077,Durbin-Watson:,2.321
Prob(Omnibus):,0.354,Jarque-Bera (JB):,1.825
Skew:,-0.461,Prob(JB):,0.402
Kurtosis:,2.837,Cond. No.,629.0


In [30]:
results = smf.ols('non_residential_percent_change_from_baseline ~ covid_hoax + ContainmentHealthIndex', data=us_cross_sectional).fit()

In [31]:
results.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.303
Model:,OLS,Adj. R-squared:,0.273
Method:,Least Squares,F-statistic:,10.01
Date:,"Fri, 19 Feb 2021",Prob (F-statistic):,0.000246
Time:,17:09:40,Log-Likelihood:,-181.38
No. Observations:,49,AIC:,368.8
Df Residuals:,46,BIC:,374.4
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,15.4988,9.244,1.677,0.100,-3.109,34.106
covid_hoax,0.2459,0.092,2.671,0.010,0.061,0.431
ContainmentHealthIndex,-0.6809,0.171,-3.972,0.000,-1.026,-0.336

0,1,2,3
Omnibus:,2.637,Durbin-Watson:,2.005
Prob(Omnibus):,0.268,Jarque-Bera (JB):,1.656
Skew:,-0.347,Prob(JB):,0.437
Kurtosis:,3.574,Cond. No.,472.0


It looks like popularity of terms like "plandemic" is a good predictor of high mobility, after controlling for containment index.

### Panel regressions: on mobility

#### Pooled OLS

In [None]:
df_onegrp.set_index(["date"]).shift(1)

In [32]:
us_monthly.columns

Index(['state', 'covid_conspiracy', 'covid_hoax', 'pizzagate', 'plandemic',
       'wuhan_lab', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResults', 'hospitalizedCurrently', 'hospitalizedCumulative',
       'inIcuCurrently', 'inIcuCumulative', 'onVentilatorCurrently',
       'onVentilatorCumulative', 'recovered', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 'posNeg',
       'dataQualityGrade', 'deathIncrease', 'hospitalizedIncrease',
   

In [33]:
us_monthly[["non_residential_percent_change_from_baseline"]].shift(1)

Unnamed: 0_level_0,non_residential_percent_change_from_baseline
date,Unnamed: 1_level_1
2020-03-31,
2020-03-31,-1.880000
2020-03-31,-7.107692
2020-03-31,-10.207143
2020-03-31,1.300000
...,...
2021-01-31,-26.858065
2021-01-31,-24.741935
2021-01-31,-26.290323
2021-01-31,-18.032258


In [69]:
val = (
    us_monthly[["state", "covid_conspiracy", "covid_hoax", "pizzagate", "plandemic", "wuhan_lab"]]
    .groupby("state")
    .resample("M")
    .asfreq()
).groupby(level=0).shift(1) 

In [68]:
pd.DataFrame(val.to_records(index=False))

Unnamed: 0,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab
0,,,,,,
1,Alabama,29.0,10.0,76.0,,33.0
2,Alabama,44.0,37.0,48.0,,54.0
3,Alabama,33.0,14.0,15.0,45.0,44.0
4,Alabama,,51.0,71.0,57.0,29.0
...,...,...,...,...,...,...
556,Wyoming,,,50.0,,
557,Wyoming,,,77.0,,
558,Wyoming,,,100.0,,
559,Wyoming,95.0,,,,


In [83]:
us_monthly = us_monthly.reset_index().set_index(["state", "date"])

In [110]:
us_monthly["StringencyIndex_times_plandemic"] = us_monthly["StringencyIndex"] * us_monthly["plandemic"] 

In [104]:
exog_vars = ["plandemic", "ContainmentHealthIndex", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PooledOLS(us_monthly.non_residential_percent_change_from_baseline, exog)
pooled_res = mod.fit()
print(pooled_res)

                                       PooledOLS Estimation Summary                                       
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.2078
Estimator:                                            PooledOLS   R-squared (Between):              0.3019
No. Observations:                                           332   R-squared (Within):               0.0623
Date:                                          Fri, Feb 19 2021   R-squared (Overall):              0.2078
Time:                                                  18:26:30   Log-likelihood                   -1425.0
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      28.680
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

In [105]:
exog_vars = ["plandemic", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PooledOLS(us_monthly.non_residential_percent_change_from_baseline, exog)
pooled_res = mod.fit()
print(pooled_res)

                                       PooledOLS Estimation Summary                                       
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.2065
Estimator:                                            PooledOLS   R-squared (Between):              0.3083
No. Observations:                                           332   R-squared (Within):               0.0506
Date:                                          Fri, Feb 19 2021   R-squared (Overall):              0.2065
Time:                                                  18:26:37   Log-likelihood                   -1425.2
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      42.817
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

In [111]:
exog_vars = ["plandemic", "StringencyIndex", "StringencyIndex_times_plandemic"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PooledOLS(us_monthly.non_residential_percent_change_from_baseline, exog)
pooled_res = mod.fit()
print(pooled_res)

                                       PooledOLS Estimation Summary                                       
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.2085
Estimator:                                            PooledOLS   R-squared (Between):              0.3117
No. Observations:                                           332   R-squared (Within):               0.0539
Date:                                          Fri, Feb 19 2021   R-squared (Overall):              0.2085
Time:                                                  18:31:01   Log-likelihood                   -1424.8
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      28.805
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

#### Fixed effects

In [107]:
exog_vars = ["plandemic", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True)
pooled_res = mod.fit()
print(pooled_res)

                                       PanelOLS Estimation Summary                                        
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.0778
Estimator:                                             PanelOLS   R-squared (Between):              0.2378
No. Observations:                                           332   R-squared (Within):               0.0778
Date:                                          Fri, Feb 19 2021   R-squared (Overall):              0.1785
Time:                                                  18:26:46   Log-likelihood                   -1312.7
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      11.809
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

In [114]:
exog_vars = ["plandemic", "StringencyIndex", "StringencyIndex_times_plandemic"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True)
pooled_res = mod.fit()
print(pooled_res)

                                       PanelOLS Estimation Summary                                        
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.0817
Estimator:                                             PanelOLS   R-squared (Between):              0.2427
No. Observations:                                           332   R-squared (Within):               0.0817
Date:                                          Fri, Feb 19 2021   R-squared (Overall):              0.1799
Time:                                                  18:33:00   Log-likelihood                   -1312.0
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      8.2691
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

In [121]:
exog_vars = ["plandemic", "StringencyIndex", "StringencyIndex_times_plandemic"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True, time_effects=True)
pooled_res = mod.fit()
print(pooled_res)

                                       PanelOLS Estimation Summary                                        
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.0695
Estimator:                                             PanelOLS   R-squared (Between):              0.1268
No. Observations:                                           332   R-squared (Within):               0.0771
Date:                                          Fri, Feb 19 2021   R-squared (Overall):              0.1153
Time:                                                  19:13:51   Log-likelihood                   -1099.4
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      5.0028
Entities:                                                    50   P-value                           0.0007
Avg Obs:                             

### Panel regressions: on mobility

In [122]:
#### Pooled OLS

In [125]:
us_cross_sectional["positivityRate"] = us_cross_sectional["positiveIncrease"] / us_cross_sectional["totalTestResultsIncrease"]

In [126]:
us_cross_sectional.columns

Index(['state', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResults', 'hospitalizedCurrently', 'hospitalizedCumulative',
       'inIcuCurrently', 'inIcuCumulative', 'onVentilatorCurrently',
       'onVentilatorCumulative', 'recovered', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 'posNeg',
       'dataQualityGrade', 'deathIncrease', 'hospitalizedIncrease',
       'commercialScore', 'negativeRegularScore', 'negativeScore',
       'positive

In [131]:
results = smf.ols('positivityRate ~ plandemic + ContainmentHealthIndex', data=us_cross_sectional).fit()

In [132]:
results.summary()

0,1,2,3
Dep. Variable:,positivityRate,R-squared:,0.486
Model:,OLS,Adj. R-squared:,0.464
Method:,Least Squares,F-statistic:,22.19
Date:,"Fri, 19 Feb 2021",Prob (F-statistic):,1.64e-07
Time:,19:23:46,Log-Likelihood:,88.182
No. Observations:,50,AIC:,-170.4
Df Residuals:,47,BIC:,-164.6
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3729,0.047,7.947,0.000,0.278,0.467
plandemic,-0.0006,0.000,-1.496,0.141,-0.001,0.000
ContainmentHealthIndex,-0.0048,0.001,-6.625,0.000,-0.006,-0.003

0,1,2,3
Omnibus:,16.844,Durbin-Watson:,1.843
Prob(Omnibus):,0.0,Jarque-Bera (JB):,20.713
Skew:,1.244,Prob(JB):,3.18e-05
Kurtosis:,4.937,Cond. No.,629.0


In [137]:
us_cross_sectional[["state", "positivityRate"]].sort_values("positivityRate")


Unnamed: 0,state,positivityRate
45,Vermont,0.014178
11,Hawaii,0.026546
19,Maine,0.027931
8,District of Columbia,0.033275
1,Alaska,0.033995
21,Massachusetts,0.037114
6,Connecticut,0.042621
39,Rhode Island,0.043567
32,New York,0.043604
37,Oregon,0.044116
