# Faiz Essa
## Google Trends Event Studies
### March 10, 2023

In [57]:
# Importing packages
import pandas as pd
import os
import math
import linearmodels as lm
import statsmodels.api as sm


The following function creates lead and lag dummy's for the event studies for different event times:

In [58]:
# Generating Functions
def GenerateDummies(df, event_time):
    ''' Inputs a data frame and event time and outputs a dataframe with dummy variables for days in event window,
    as well as pre-event-window and post-event-window dummys.
    '''
    # generate a variable that details time to event
    df["weeks_to_event"] = df["time"] - event_time 
    
    # creating PreEventWindow and PostEventWindow variables
    df.loc[df["weeks_to_event"] < -8, "PreEventWindow"] = 1
    df.loc[df["weeks_to_event"] >= -8, "PreEventWindow"] = 0
    df.loc[df["weeks_to_event"] > 8, "PostEventWindow"] = 1
    df.loc[df["weeks_to_event"] <= 8, "PostEventWindow"] = 0
    
    # Creating lags and leads for event window 
    df.loc[(df["weeks_to_event"] >= -8) & (df["weeks_to_event"] <= 8), \
                      "window"] = df["weeks_to_event"] 
    
    # adding in dummies 
    df = pd.get_dummies(df, prefix = "weeks_to_event", columns =["window"])
    
    # Interacting dummies with WhatsAppInterest
    # leads
    df["lead9"] = df["PreEventWindow"] * df["WhatsAppInterest"]
    df["lead8"] = df["weeks_to_event_-8.0"] * df["WhatsAppInterest"]
    df["lead7"] = df["weeks_to_event_-7.0"] * df["WhatsAppInterest"]
    df["lead6"] = df["weeks_to_event_-6.0"] * df["WhatsAppInterest"]
    df["lead5"] = df["weeks_to_event_-5.0"] * df["WhatsAppInterest"]
    df["lead4"] = df["weeks_to_event_-4.0"] * df["WhatsAppInterest"]
    df["lead3"] = df["weeks_to_event_-3.0"] * df["WhatsAppInterest"]
    df["lead2"] = df["weeks_to_event_-2.0"] * df["WhatsAppInterest"]

    # lags
    df["lag0"] = df["weeks_to_event_0.0"] * df["WhatsAppInterest"]
    df["lag1"] = df["weeks_to_event_1.0"] * df["WhatsAppInterest"]
    df["lag2"] = df["weeks_to_event_2.0"] * df["WhatsAppInterest"]
    df["lag3"] = df["weeks_to_event_3.0"] * df["WhatsAppInterest"]
    df["lag4"] = df["weeks_to_event_4.0"] * df["WhatsAppInterest"]
    df["lag5"] = df["weeks_to_event_5.0"] * df["WhatsAppInterest"]
    df["lag6"] = df["weeks_to_event_6.0"] * df["WhatsAppInterest"]
    df["lag7"] = df["weeks_to_event_7.0"] * df["WhatsAppInterest"]
    df["lag8"] = df["weeks_to_event_8.0"] * df["WhatsAppInterest"]
    df["lag9"] = df["PostEventWindow"] * df["WhatsAppInterest"]
    
    # set index (State is 'Entity' and year-week is 'time')
    df = df.set_index(["GAULADM1Code", "time"])
    
    return df

In [59]:
# Set working directory
os.chdir('/Users/faizessa/Library/CloudStorage/Dropbox/Mac/Documents/GitHub/WhatsApp-and-Collective-Action')

The raw data is formatted as a panel detailing the number of protests each week from 2015 to 2021 in each Indian State. 

We also have data on WhatsApp interest from google trends for each Indian state. The WhatsApp data comed from *[here](https://trends.google.com/trends/explore?date=2017-07-09%202018-07-09&geo=IN&q=%2Fm%2F0gwzvs1&hl=en)*.

In [60]:
# Importing data
protest_panel = pd.read_csv("Data/GDELT/gtrends_es_panel.csv")
print(protest_panel)
# preview of data: 

      GAULADM1Code  WhatsAppInterest  year  week  time  protest_count
0             IN25               100  2015     1     1              0
1             IN25               100  2015     2     2              0
2             IN25               100  2015     3     3              0
3             IN25               100  2015     4     4              5
4             IN25               100  2015     5     5              0
...            ...               ...   ...   ...   ...            ...
13170         IN30                43  2022    50   421              0
13171         IN30                43  2022    51   422              1
13172         IN30                43  2022    52   423              0
13173         IN30                43  2022    53   424              0
13174         IN30                43  2023     1   425              0

[13175 rows x 6 columns]


In [61]:
# Creates dataframes for each event

# Labelling forwarded messages (10th July 2018, time = 187)
forward_label_data = GenerateDummies(protest_panel, 187)

# WhatsApp forwards can be sent to only 5 chats at a time in India and 20 chats everywhere else (18th July 2018, time =188)
# note this is just one week following the previous change
forward_limit_1_data = GenerateDummies(protest_panel, 188)

# WhatsApp forwards limited to 5 chats worldwide (21 Jan 2019, time = 216)
forward_limit_2_data = GenerateDummies(protest_panel, 216)

# WhatsApp launched a fact-check service to combat fake news (2nd Apr 2019, time = 226)
# Privacy settings to control who has permission to add people to groups (3rd Apr 2019, time = 226)
# these two changes are at the same time
fact_check_data = GenerateDummies(protest_panel, 226)


# WhatsApp messages that have already been forwarded by 5 or more people can only be forwarded to 1 chat (7 Apr 2020, time = 280)
# week after policies above
forward_limit_3_data = GenerateDummies(protest_panel, 280)




I employ the following OLS regression to study the effects of WhatsApp policy changes:

$\mathrm{Protests}_{st} = \alpha + \beta_{-9} (\mathrm{PreEventWindow}_t \times \mathrm{WhatsAppInterest}_{s}) + \sum_{i \in [-8, 8]\setminus \{-1\}} \beta_i (i\_\mathrm{WeeksToEvent}_{t} \times \mathrm{WhatsAppInterest}_{s}) + \beta_{9} (\mathrm{PostEventWindow}_t \times \mathrm{WhatsAppInterest}_{s}) + \gamma_{s} + \eta_{t} + \varepsilon_{st}$

$s$ indexes state and $t$ indexes year-week groups. 

Note that PreEventWindow and WhatsAppInterest are zeroed out by fixed effects.

The interaction terms will be labeled as "lags" and "leads" in the code and regression tables (see function def above)

In [63]:
# Defining Variables in Regression

# RHS variables are out leads and lags
exog_vars = ["lead9", "lead8", "lead7", "lead6", "lead5", "lead4", "lead3", "lead2", \
       "lag0", "lag1", "lag2", "lag3", "lag4", "lag5", "lag6", "lag7", "lag8", "lag9"]

# Executing regressions

# Labelling forwarded messages (10th July 2018, time = 187)
forward_label_mod = lm.PanelOLS(forward_label_data["protest_count"], \
                                sm.add_constant(forward_label_data[exog_vars]), \
                                entity_effects = True, time_effects = True)
forward_label_fit = forward_label_mod.fit(cov_type ='clustered', cluster_entity = True)

# WhatsApp forwards can be sent to only 5 chats at a time in India and 20 chats everywhere else (18th July 2018, time =188)
# note this is just one week following the previous change
forward_limit_1_mod = lm.PanelOLS(forward_limit_1_data["protest_count"], \
                                sm.add_constant(forward_limit_1_data[exog_vars]), \
                                entity_effects = True, time_effects = True)
forward_limit_1_fit = forward_limit_1_mod.fit(cov_type ='clustered', cluster_entity = True)

# WhatsApp forwards limited to 5 chats worldwide (21 Jan 2019, time = 216)
forward_limit_2_mod = lm.PanelOLS(forward_limit_2_data["protest_count"], \
                                 sm.add_constant(forward_limit_2_data[exog_vars]), \
                                 entity_effects = True, time_effects = True)
forward_limit_2_fit = forward_limit_2_mod.fit(cov_type = 'clustered', cluster_entity = True)

# WhatsApp launched a fact-check service to combat fake news (2nd Apr 2019, time = 226)
# Privacy settings to control who has permission to add people to groups (3rd Apr 2019, time = 226)
# these two changes are at the same time
fact_check_mod = lm.PanelOLS(fact_check_data["protest_count"], \
                                 sm.add_constant(fact_check_data[exog_vars]), \
                                 entity_effects = True, time_effects = True)
fact_check_fit = fact_check_mod.fit(cov_type = 'clustered', cluster_entity = True)

# WhatsApp messages that have already been forwarded by 5 or more people can only be forwarded to 1 chat (7 Apr 2020, time = 280)
forward_limit_3_mod = lm.PanelOLS(forward_limit_3_data["protest_count"], \
                                 sm.add_constant(forward_limit_3_data[exog_vars]), \
                                 entity_effects = True, time_effects = True)
forward_limit_3_fit = forward_limit_3_mod.fit(cov_type = 'clustered', cluster_entity = True)






0,1,2,3
Dep. Variable:,protest_count,R-squared:,0.0045
Estimator:,PanelOLS,R-squared (Between):,0.0163
No. Observations:,13175,R-squared (Within):,-0.0319
Date:,"Fri, Mar 10 2023",R-squared (Overall):,-0.0122
Time:,15:15:18,Log-likelihood,-6.84e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,3.2132
Entities:,31,P-value,0.0000
Avg Obs:,425.00,Distribution:,"F(18,12702)"
Min Obs:,425.00,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,18.889,7.0973,2.6615,0.0078,4.9777,32.801
lead9,0.2848,0.1275,2.2346,0.0255,0.0350,0.5346
lead8,-0.0601,0.1837,-0.3273,0.7435,-0.4202,0.3000
lead7,2.1963,2.0012,1.0975,0.2724,-1.7263,6.1189
lead6,0.5502,0.3767,1.4608,0.1441,-0.1881,1.2885
lead5,0.1184,0.2169,0.5457,0.5852,-0.3068,0.5435
lead4,-0.0535,0.1782,-0.3004,0.7639,-0.4029,0.2958
lead3,0.3933,0.5237,0.7511,0.4526,-0.6332,1.4198
lead2,0.2344,0.1983,1.1816,0.2374,-0.1544,0.6231
