# EDA and analyses of Google Trends data

In [1]:
from linearmodels.panel import PooledOLS, PanelOLS

import statsmodels.api as sm
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf

from statsmodels.iolib.summary2 import summary_col

In [2]:
trend_month_state = pd.read_csv("data/cleaned/trend_month_state.csv", index_col=0)
trend_month_DMA = pd.read_csv("data/cleaned/trend_month_DMA.csv", index_col=0)
trend_all_time_state = pd.read_csv("data/cleaned/trend_all_time_state.csv", index_col=0)
trend_all_time_DMA = pd.read_csv("data/cleaned/trend_all_time_DMA.csv", index_col=0)

In [3]:
trend_all_time_state.replace(0, np.nan, inplace = True)
trend_month_state.replace(0, np.nan, inplace = True)

## EDA

In [4]:
trend_all_time_state.head()

Unnamed: 0,date_range,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab
0,2020-03-01 2021-01-31,Alabama,52.0,38.0,80.0,57.0,39.0
1,2020-03-01 2021-01-31,Alaska,90.0,94.0,49.0,77.0,46.0
2,2020-03-01 2021-01-31,Arizona,63.0,48.0,79.0,53.0,49.0
3,2020-03-01 2021-01-31,Arkansas,51.0,46.0,79.0,51.0,52.0
4,2020-03-01 2021-01-31,California,54.0,49.0,56.0,58.0,49.0


In [5]:
trend_all_time_state[
    ["covid_conspiracy", "covid_hoax", "pizzagate", "plandemic", "wuhan_lab"]
].corr(method="pearson")

Unnamed: 0,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab
covid_conspiracy,1.0,0.253764,-0.120353,0.487821,0.135147
covid_hoax,0.253764,1.0,-0.112811,0.317223,0.234428
pizzagate,-0.120353,-0.112811,1.0,-0.239771,-0.300099
plandemic,0.487821,0.317223,-0.239771,1.0,0.128978
wuhan_lab,0.135147,0.234428,-0.300099,0.128978,1.0


It turns out that popularity of COVID-related conspiracy theories is not strongly correlated with "pizzagate", which is not related to COVID-19 per se. Among all covid-19 related keywords, there is a positive, yet not necessarily strong correlation. 

### Principle component analyses

In [6]:
pca_pipe =  Pipeline([('inputer', SimpleImputer()), ('PCA', PCA(n_components=2))])
principalComponents = pca_pipe.fit_transform(
    trend_all_time_state[["covid_conspiracy", "covid_hoax", "plandemic", "wuhan_lab"]]
)
principalDf = pd.DataFrame(data=principalComponents)

In [7]:
pca_pipe["PCA"].explained_variance_ratio_

array([0.46055991, 0.23254269])

In [8]:
pca_pipe["PCA"].components_

array([[ 0.62681881,  0.47529325,  0.54963197,  0.28124579],
       [-0.43683202,  0.43500537, -0.25854643,  0.74370818]])

## Panel analyses

## Develop an index

### with "Wuhan Lab"

### without "Wuhan Lab"

In [9]:
trend_all_time_state["misinformation_index"] = trend_all_time_state[["covid_hoax", "covid_conspiracy", "plandemic"]].mean(axis = 1)

In [10]:
trend_all_time_state.sort_values("misinformation_index", ascending = False)

Unnamed: 0,date_range,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,misinformation_index
1,2020-03-01 2021-01-31,Alaska,90.0,94.0,49.0,77.0,46.0,87.0
29,2020-03-01 2021-01-31,New Hampshire,92.0,78.0,55.0,65.0,91.0,78.333333
45,2020-03-01 2021-01-31,Vermont,55.0,100.0,48.0,79.0,100.0,78.0
23,2020-03-01 2021-01-31,Minnesota,82.0,50.0,57.0,94.0,52.0,75.333333
34,2020-03-01 2021-01-31,North Dakota,100.0,39.0,54.0,85.0,38.0,74.666667
27,2020-03-01 2021-01-31,Nebraska,78.0,58.0,72.0,81.0,39.0,72.333333
50,2020-03-01 2021-01-31,Wyoming,71.0,,61.0,72.0,32.0,71.5
37,2020-03-01 2021-01-31,Oregon,70.0,65.0,67.0,75.0,43.0,70.0
5,2020-03-01 2021-01-31,Colorado,77.0,67.0,63.0,66.0,35.0,70.0
12,2020-03-01 2021-01-31,Idaho,53.0,89.0,69.0,66.0,42.0,69.333333


## Regression analyses

### Relationship between conspiracy theory popularity and compliance (mobility)

In [11]:
us_daily = pd.read_csv("data/cleaned/daily_df.csv", index_col=0)
us_daily["date"] = pd.to_datetime(us_daily["date"])
us_daily = us_daily.set_index("date")

In [12]:
us_monthly = us_daily.groupby([pd.Grouper(freq='M'), 'state']).mean()
us_monthly

Unnamed: 0_level_0,Unnamed: 1_level_0,positive,probableCases,negative,pending,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,C7_Restrictions on internal movement,C7_Flag,C8_International travel controls,ConfirmedCases,ConfirmedDeaths,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,pop_2019,GDP_2019
date,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2020-01-31,Florida,0.000000,,,,2.000000e+00,,,,,,...,0.0,,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,21477737.0,950759.1
2020-01-31,Massachusetts,,,,,2.700000e+00,,,,,,...,0.0,,0.0,0.300000,0.000000,2.220000,1.332000,1.536000,6892503.0,518718.6
2020-01-31,Virginia,,,,,,,,,,,...,0.0,,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,8535519.0,485998.4
2020-01-31,Washington,1.333333,,,,0.000000e+00,,,,,,...,0.0,,0.0,1.000000,0.000000,0.000000,3.394211,3.913684,7614893.0,531224.1
2020-02-29,Florida,0.000000,,,,1.024138e+01,,,,,,...,0.0,,0.0,0.000000,0.000000,0.958621,0.574138,0.663793,21477737.0,950759.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-28,Virginia,533521.941176,108944.411765,,251.411765,5.468171e+06,2196.411765,22329.588235,446.235294,,280.117647,...,0.0,,0.0,533521.941176,6835.823529,53.373529,53.283529,57.631176,8535519.0,485998.4
2021-02-28,Washington,321918.647059,15809.117647,,,4.761709e+06,740.529412,18326.117647,175.411765,,69.176471,...,1.0,0.0,2.0,323048.647059,4541.058824,62.960000,63.220000,61.410000,7614893.0,531224.1
2021-02-28,West Virginia,125328.235294,24920.941176,1.907528e+06,,2.032857e+06,373.470588,,100.058824,,47.000000,...,1.0,1.0,2.0,125328.235294,2144.235294,46.300000,50.110000,57.820000,1792147.0,72161.5
2021-02-28,Wisconsin,602149.941176,51173.529412,2.543102e+06,38.176471,6.377024e+06,531.882353,24978.058824,137.176471,2206.823529,,...,1.0,1.0,0.0,602149.941176,6648.176471,47.690000,54.112000,58.588000,5822434.0,305874.4


In [13]:
us_monthly = pd.DataFrame(us_monthly.to_records())

In [14]:
trend_month_state["date"] = pd.to_datetime(trend_month_state["date_range"].str.slice(start = -10))

In [15]:
trend_month_state = trend_month_state.drop("date_range", axis = 1)

In [16]:
trend_month_state

Unnamed: 0,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,date
0,Alabama,29.0,10.0,76.0,,33.0,2020-03-31
1,Alaska,73.0,80.0,97.0,,100.0,2020-03-31
2,Arizona,34.0,25.0,60.0,,31.0,2020-03-31
3,Arkansas,100.0,,11.0,,34.0,2020-03-31
4,California,41.0,18.0,44.0,18.0,42.0,2020-03-31
...,...,...,...,...,...,...,...
556,Virginia,19.0,14.0,10.0,5.0,14.0,2021-01-31
557,Washington,23.0,4.0,12.0,,52.0,2021-01-31
558,West Virginia,,,35.0,,,2021-01-31
559,Wisconsin,11.0,6.0,45.0,8.0,,2021-01-31


In [17]:
us_monthly = pd.merge(trend_month_state, us_monthly, on = ["state", "date"], how = "left")

In [18]:
us_monthly = us_monthly.set_index("date")

#### Cross-sectional

In [19]:
us_cross_sectional = us_daily.loc[(us_daily.index >= "2020-03-01") & (us_daily.index <= "2021-01-31")].groupby("state").mean()

In [20]:
us_cross_sectional.head()

Unnamed: 0_level_0,positive,probableCases,negative,pending,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,C7_Restrictions on internal movement,C7_Flag,C8_International travel controls,ConfirmedCases,ConfirmedDeaths,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,pop_2019,GDP_2019
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,136385.302115,27217.243902,760604.7,33.0,869817.5,1171.56051,15107.151815,,1471.101351,,...,0.0,,0.0,136602.320242,2154.471299,36.371843,40.989245,43.211027,4903185.0,202940.3
Alaska,13197.732087,,,8.5,456334.8,57.076125,346.443769,,,7.689516,...,1.692771,0.922581,1.933735,13451.298193,64.53012,53.17244,52.371175,52.85488,731545.0,54442.9
American Samoa,0.0,,1204.01,10.5,1147.922,,,,,,...,,,,,,,,,,
Arizona,201980.850299,9544.242553,1006278.0,44.24,1984202.0,1937.880952,16354.311377,516.486395,,337.914966,...,0.221557,1.0,0.221557,201990.272455,4162.688623,42.069641,47.662425,46.335539,7278717.0,321431.9
Arkansas,80232.936747,20588.403974,765162.0,47.588235,836040.6,526.89557,4944.75817,313.140845,43.0,98.476038,...,0.246988,1.0,0.246988,80259.963855,1261.433735,44.011807,44.86997,47.252952,3017804.0,119443.4


In [21]:
trend_all_time_state.drop("date_range", axis = 1)

Unnamed: 0,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,misinformation_index
0,Alabama,52.0,38.0,80.0,57.0,39.0,49.0
1,Alaska,90.0,94.0,49.0,77.0,46.0,87.0
2,Arizona,63.0,48.0,79.0,53.0,49.0,54.666667
3,Arkansas,51.0,46.0,79.0,51.0,52.0,49.333333
4,California,54.0,49.0,56.0,58.0,49.0,53.666667
5,Colorado,77.0,67.0,63.0,66.0,35.0,70.0
6,Connecticut,39.0,49.0,48.0,63.0,64.0,50.333333
7,Delaware,17.0,60.0,78.0,28.0,60.0,35.0
8,District of Columbia,60.0,45.0,28.0,85.0,79.0,63.333333
9,Florida,50.0,39.0,59.0,45.0,44.0,44.666667


In [22]:
us_cross_sectional = pd.merge(us_cross_sectional, trend_all_time_state.drop("date_range", axis = 1), on="state")

In [24]:
us_cross_sectional[
    [
        "covid_conspiracy",
        "covid_hoax",
        "pizzagate",
        "plandemic",
        "wuhan_lab",
        "misinformation_index",
        "retail_and_recreation_percent_change_from_baseline",
        "grocery_and_pharmacy_percent_change_from_baseline",
        "parks_percent_change_from_baseline",
        "transit_stations_percent_change_from_baseline",
        "workplaces_percent_change_from_baseline",
        "residential_percent_change_from_baseline",
    ]
].corr()

Unnamed: 0,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab,misinformation_index,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
covid_conspiracy,1.0,0.253764,-0.120353,0.487821,0.135147,0.784104,-0.010402,-0.019829,0.17851,-0.049109,0.068361,-0.00405
covid_hoax,0.253764,1.0,-0.112811,0.317223,0.234428,0.691606,0.230847,0.243772,0.296104,0.127058,0.134546,-0.135672
pizzagate,-0.120353,-0.112811,1.0,-0.239771,-0.300099,-0.20842,0.401191,0.301877,-0.004714,0.401519,0.370558,-0.483044
plandemic,0.487821,0.317223,-0.239771,1.0,0.128978,0.787842,0.018,0.221099,0.445,0.070676,0.057921,-0.044378
wuhan_lab,0.135147,0.234428,-0.300099,0.128978,1.0,0.205043,-0.321172,-0.389766,0.021916,-0.447594,-0.396473,0.419193
misinformation_index,0.784104,0.691606,-0.20842,0.787842,0.205043,1.0,0.114625,0.204477,0.407142,0.081769,0.129428,-0.095174
retail_and_recreation_percent_change_from_baseline,-0.010402,0.230847,0.401191,0.018,-0.321172,0.114625,1.0,0.875769,0.626384,0.861872,0.919708,-0.915799
grocery_and_pharmacy_percent_change_from_baseline,-0.019829,0.243772,0.301877,0.221099,-0.389766,0.204477,0.875769,1.0,0.690662,0.845603,0.827409,-0.835714
parks_percent_change_from_baseline,0.17851,0.296104,-0.004714,0.445,0.021916,0.407142,0.626384,0.690662,1.0,0.49145,0.534746,-0.475357
transit_stations_percent_change_from_baseline,-0.049109,0.127058,0.401519,0.070676,-0.447594,0.081769,0.861872,0.845603,0.49145,1.0,0.901898,-0.923143


In [25]:
us_cross_sectional.columns

Index(['state', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResults', 'hospitalizedCurrently', 'hospitalizedCumulative',
       'inIcuCurrently', 'inIcuCumulative', 'onVentilatorCurrently',
       'onVentilatorCumulative', 'recovered', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 'posNeg',
       'dataQualityGrade', 'deathIncrease', 'hospitalizedIncrease',
       'commercialScore', 'negativeRegularScore', 'negativeScore',
       'positive

In [26]:
us_cross_sectional[["state", "non_residential_percent_change_from_baseline"]].sort_values("non_residential_percent_change_from_baseline")

Unnamed: 0,state,non_residential_percent_change_from_baseline
8,District of Columbia,-45.761562
11,Hawaii,-39.502994
4,California,-26.760479
9,Florida,-26.51276
28,Nevada,-24.681682
2,Arizona,-21.84491
32,New York,-21.045833
43,Texas,-18.492537
18,Louisiana,-16.592749
21,Massachusetts,-16.407715


In [34]:
cross_sectional_reg_1 = smf.ols('non_residential_percent_change_from_baseline ~ misinformation_index', data=us_cross_sectional).fit()
cross_sectional_reg_1.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.09
Model:,OLS,Adj. R-squared:,0.071
Method:,Least Squares,F-statistic:,4.827
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.0328
Time:,09:43:41,Log-Likelihood:,-202.52
No. Observations:,51,AIC:,409.0
Df Residuals:,49,BIC:,412.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-24.7055,8.947,-2.761,0.008,-42.685,-6.726
misinformation_index,0.3350,0.153,2.197,0.033,0.029,0.642

0,1,2,3
Omnibus:,12.415,Durbin-Watson:,1.773
Prob(Omnibus):,0.002,Jarque-Bera (JB):,13.896
Skew:,-0.945,Prob(JB):,0.00096
Kurtosis:,4.723,Cond. No.,286.0


In [42]:
cross_sectional_reg_2 = smf.ols('non_residential_percent_change_from_baseline ~ misinformation_index + StringencyIndex', data=us_cross_sectional).fit()
cross_sectional_reg_2.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.372
Model:,OLS,Adj. R-squared:,0.346
Method:,Least Squares,F-statistic:,13.94
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,1.77e-05
Time:,09:56:37,Log-Likelihood:,-184.81
No. Observations:,50,AIC:,375.6
Df Residuals:,47,BIC:,381.3
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.9674,9.838,0.403,0.689,-15.825,23.760
misinformation_index,0.3698,0.117,3.150,0.003,0.134,0.606
StringencyIndex,-0.5931,0.140,-4.238,0.000,-0.875,-0.312

0,1,2,3
Omnibus:,3.227,Durbin-Watson:,2.166
Prob(Omnibus):,0.199,Jarque-Bera (JB):,2.53
Skew:,-0.547,Prob(JB):,0.282
Kurtosis:,3.132,Cond. No.,533.0


In [59]:
cross_sectional_reg_plandemic = smf.ols('non_residential_percent_change_from_baseline ~ plandemic + StringencyIndex', data=us_cross_sectional).fit()
cross_sectional_reg_plandemic.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.397
Model:,OLS,Adj. R-squared:,0.372
Method:,Least Squares,F-statistic:,15.5
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,6.75e-06
Time:,10:09:49,Log-Likelihood:,-183.78
No. Observations:,50,AIC:,373.6
Df Residuals:,47,BIC:,379.3
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.9451,9.658,0.201,0.841,-17.484,21.375
plandemic,0.3284,0.094,3.507,0.001,0.140,0.517
StringencyIndex,-0.5406,0.138,-3.920,0.000,-0.818,-0.263

0,1,2,3
Omnibus:,1.501,Durbin-Watson:,2.404
Prob(Omnibus):,0.472,Jarque-Bera (JB):,1.368
Skew:,-0.39,Prob(JB):,0.505
Kurtosis:,2.779,Cond. No.,564.0


In [60]:
cross_sectional_reg_covid_hoax = smf.ols('non_residential_percent_change_from_baseline ~ covid_hoax + StringencyIndex', data=us_cross_sectional).fit()

In [61]:
cross_sectional_reg_covid_hoax.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.354
Model:,OLS,Adj. R-squared:,0.326
Method:,Least Squares,F-statistic:,12.59
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,4.36e-05
Time:,10:09:53,Log-Likelihood:,-180.88
No. Observations:,49,AIC:,367.8
Df Residuals:,46,BIC:,373.4
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,13.6726,8.057,1.697,0.096,-2.546,29.892
covid_hoax,0.2577,0.091,2.830,0.007,0.074,0.441
StringencyIndex,-0.6409,0.141,-4.534,0.000,-0.925,-0.356

0,1,2,3
Omnibus:,3.505,Durbin-Watson:,2.014
Prob(Omnibus):,0.173,Jarque-Bera (JB):,2.598
Skew:,-0.326,Prob(JB):,0.273
Kurtosis:,3.92,Cond. No.,418.0


In [62]:
cross_sectional_reg_covid_conspiracy = smf.ols('non_residential_percent_change_from_baseline ~ covid_conspiracy + StringencyIndex', data=us_cross_sectional).fit()
cross_sectional_reg_covid_conspiracy.summary()

0,1,2,3
Dep. Variable:,non_residential_percent_change_from_baseline,R-squared:,0.25
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,7.818
Date:,"Sat, 20 Feb 2021",Prob (F-statistic):,0.00117
Time:,10:09:53,Log-Likelihood:,-189.27
No. Observations:,50,AIC:,384.5
Df Residuals:,47,BIC:,390.3
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,20.8203,9.606,2.167,0.035,1.495,40.146
covid_conspiracy,0.0726,0.093,0.783,0.437,-0.114,0.259
StringencyIndex,-0.5883,0.153,-3.841,0.000,-0.896,-0.280

0,1,2,3
Omnibus:,1.391,Durbin-Watson:,2.279
Prob(Omnibus):,0.499,Jarque-Bera (JB):,1.197
Skew:,-0.371,Prob(JB):,0.55
Kurtosis:,2.848,Cond. No.,475.0


In [69]:
print(summary_col([cross_sectional_reg_1,cross_sectional_reg_2],stars=True,float_format='%0.2f').as_latex())

\begin{table}
\caption{}
\label{}
\begin{center}
\begin{tabular}{llllll}
\hline
                      & non\_residential\_percent\_change\_from\_baseline I & non\_residential\_percent\_change\_from\_baseline II & non\_residential\_percent\_change\_from\_baseline III & non\_residential\_percent\_change\_from\_baseline IIII & non\_residential\_percent\_change\_from\_baseline IIIII  \\
\hline
Intercept             & -24.71***                                           & 3.97                                                 & 1.95                                                  & 13.67*                                                 & 20.82**                                                  \\
                      & (8.95)                                              & (9.84)                                               & (9.66)                                                & (8.06)                                                 & (9.61)                                                 

In [76]:
print(summary_col([cross_sectional_reg_plandemic, cross_sectional_reg_covid_hoax, cross_sectional_reg_covid_conspiracy],stars=True,float_format='%0.2f').as_latex())

\begin{table}
\caption{}
\label{}
\begin{center}
\begin{tabular}{llll}
\hline
                  & non\_residential\_percent\_change\_from\_baseline I & non\_residential\_percent\_change\_from\_baseline II & non\_residential\_percent\_change\_from\_baseline III  \\
\hline
Intercept         & 1.95                                                & 13.67*                                               & 20.82**                                                \\
                  & (9.66)                                              & (8.06)                                               & (9.61)                                                 \\
R-squared         & 0.40                                                & 0.35                                                 & 0.25                                                   \\
R-squared Adj.    & 0.37                                                & 0.33                                                 & 0.22                                  

In [72]:
us_cross_sectional["non_residential_percent_change_from_baseline"].std()

13.584944767366649

In [73]:
us_cross_sectional["StringencyIndex"].std()

10.264002730555783

It looks like popularity of terms like "plandemic" is a good predictor of high mobility, after controlling for containment index.

### Panel regressions: on op

#### Pooled OLS

In [74]:
us_monthly.columns

Index(['state', 'covid_conspiracy', 'covid_hoax', 'pizzagate', 'plandemic',
       'wuhan_lab', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResults', 'hospitalizedCurrently', 'hospitalizedCumulative',
       'inIcuCurrently', 'inIcuCumulative', 'onVentilatorCurrently',
       'onVentilatorCumulative', 'recovered', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 'posNeg',
       'dataQualityGrade', 'deathIncrease', 'hospitalizedIncrease',
   

In [77]:
us_monthly[["non_residential_percent_change_from_baseline"]].shift(1)

Unnamed: 0_level_0,non_residential_percent_change_from_baseline
date,Unnamed: 1_level_1
2020-03-31,
2020-03-31,-1.880000
2020-03-31,-7.107692
2020-03-31,-10.207143
2020-03-31,1.300000
...,...
2021-01-31,-26.858065
2021-01-31,-24.741935
2021-01-31,-26.290323
2021-01-31,-18.032258


In [78]:
val = (
    us_monthly[["state", "covid_conspiracy", "covid_hoax", "pizzagate", "plandemic", "wuhan_lab"]]
    .groupby("state")
    .resample("M")
    .asfreq()
).groupby(level=0).shift(1) 

In [79]:
pd.DataFrame(val.to_records(index=False))

Unnamed: 0,state,covid_conspiracy,covid_hoax,pizzagate,plandemic,wuhan_lab
0,,,,,,
1,Alabama,29.0,10.0,76.0,,33.0
2,Alabama,44.0,37.0,48.0,,54.0
3,Alabama,33.0,14.0,15.0,45.0,44.0
4,Alabama,,51.0,71.0,57.0,29.0
...,...,...,...,...,...,...
556,Wyoming,,,50.0,,
557,Wyoming,,,77.0,,
558,Wyoming,,,100.0,,
559,Wyoming,95.0,,,,


In [80]:
us_monthly = us_monthly.reset_index().set_index(["state", "date"])

In [81]:
us_monthly["StringencyIndex_times_plandemic"] = us_monthly["StringencyIndex"] * us_monthly["plandemic"] 

In [83]:
exog_vars = ["plandemic", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PooledOLS(us_monthly.non_residential_percent_change_from_baseline, exog)
pooled_res = mod.fit()
print(pooled_res)

                                       PooledOLS Estimation Summary                                       
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.2065
Estimator:                                            PooledOLS   R-squared (Between):              0.3083
No. Observations:                                           332   R-squared (Within):               0.0506
Date:                                          Sat, Feb 20 2021   R-squared (Overall):              0.2065
Time:                                                  10:37:13   Log-likelihood                   -1425.2
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      42.817
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

In [97]:
pooled_res.summary.as_latex()

'\\begin{center}\n\\begin{tabular}{lclc}\n\\toprule\n\\textbf{Dep. Variable:}                    & non\\_residential\\_percent\\_change\\_from\\_baseline & \\textbf{  R-squared:         }   &      0.2260      \\\\\n\\textbf{Estimator:}                        &                      PanelOLS                     & \\textbf{  R-squared (Between):}  &      0.2856      \\\\\n\\textbf{No. Observations:}                 &                        332                        & \\textbf{  R-squared (Within):}   &      0.0723      \\\\\n\\textbf{Date:}                             &                  Sat, Feb 20 2021                 & \\textbf{  R-squared (Overall):}  &      0.2016      \\\\\n\\textbf{Time:}                             &                      10:44:20                     & \\textbf{  Log-likelihood     }   &     -1340.3      \\\\\n\\textbf{Cov. Estimator:}                   &                     Unadjusted                    & \\textbf{                     }   &                  \\\\\n

In [99]:
us_monthly["plandemic"].std()

23.85524463086154

#### Fixed effects

In [98]:
exog_vars = ["plandemic", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, time_effects=True)
fixed_res = mod.fit()
print(fixed_res)

                                       PanelOLS Estimation Summary                                        
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.2206
Estimator:                                             PanelOLS   R-squared (Between):              0.2821
No. Observations:                                           332   R-squared (Within):               0.0686
Date:                                          Sat, Feb 20 2021   R-squared (Overall):              0.2003
Time:                                                  10:48:07   Log-likelihood                   -1341.5
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      45.138
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

In [102]:
fixed_res.summary.as_latex()

'\\begin{center}\n\\begin{tabular}{lclc}\n\\toprule\n\\textbf{Dep. Variable:}    & non\\_residential\\_percent\\_change\\_from\\_baseline & \\textbf{  R-squared:         }   &      0.2206      \\\\\n\\textbf{Estimator:}        &                      PanelOLS                     & \\textbf{  R-squared (Between):}  &      0.2821      \\\\\n\\textbf{No. Observations:} &                        332                        & \\textbf{  R-squared (Within):}   &      0.0686      \\\\\n\\textbf{Date:}             &                  Sat, Feb 20 2021                 & \\textbf{  R-squared (Overall):}  &      0.2003      \\\\\n\\textbf{Time:}             &                      10:48:07                     & \\textbf{  Log-likelihood     }   &     -1341.5      \\\\\n\\textbf{Cov. Estimator:}   &                     Unadjusted                    & \\textbf{                     }   &                  \\\\\n\\textbf{}                  &                                                   & \\textbf{  F-s

In [93]:
exog_vars = ["plandemic", "StringencyIndex", "StringencyIndex_times_plandemic"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, time_effects=True)
pooled_res = mod.fit()
print(pooled_res)

                                       PanelOLS Estimation Summary                                        
Dep. Variable:     non_residential_percent_change_from_baseline   R-squared:                        0.2260
Estimator:                                             PanelOLS   R-squared (Between):              0.2856
No. Observations:                                           332   R-squared (Within):               0.0723
Date:                                          Sat, Feb 20 2021   R-squared (Overall):              0.2016
Time:                                                  10:44:20   Log-likelihood                   -1340.3
Cov. Estimator:                                      Unadjusted                                           
                                                                  F-statistic:                      30.947
Entities:                                                    50   P-value                           0.0000
Avg Obs:                             

#### Panel regression: add lagging

In [None]:
us_monthly["plandemic_l1"] = list(us_monthly[["plandemic"]].reset_index().sort_values("date").set_index("date").groupby("state").shift()["plandemic"]) # Lagging "plandemic" variable

In [None]:
us_monthly["StringencyIndex_times_plandemic_l1"] = us_monthly["StringencyIndex"] * us_monthly["plandemic_l1"] 

In [None]:
exog_vars = ["plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PooledOLS(us_monthly.non_residential_percent_change_from_baseline, exog)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic_l1", "plandemic", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, time_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic_l1", "StringencyIndex_times_plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, time_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic_l1", "StringencyIndex_times_plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic_l1", "StringencyIndex_times_plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True, time_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic", "StringencyIndex_times_plandemic", "plandemic_l1", "StringencyIndex_times_plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic", "StringencyIndex_times_plandemic", "plandemic_l1", "StringencyIndex_times_plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, time_effects=True)
pooled_res = mod.fit()
print(pooled_res)

In [None]:
exog_vars = ["plandemic", "StringencyIndex_times_plandemic", "plandemic_l1", "StringencyIndex_times_plandemic_l1", "StringencyIndex"]
exog = sm.add_constant(us_monthly[exog_vars])
mod = PanelOLS(us_monthly.non_residential_percent_change_from_baseline, exog, entity_effects=True, time_effects=True)
pooled_res = mod.fit()
print(pooled_res)

### Panel regressions: on outcomes