<a href="https://colab.research.google.com/github/Udzf/Israel-Palestine/blob/main/Econ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install and load packages

In [1]:
!pip install linearmodels
!pip install stargazer

Collecting linearmodels
  Downloading linearmodels-6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.2.0-py3-none-any.whl.metadata (4.0 kB)
Collecting formulaic>=1.0.0 (from linearmodels)
  Downloading formulaic-1.1.1-py3-none-any.whl.metadata (6.9 kB)
Collecting setuptools-scm<9.0.0,>=8.0.0 (from setuptools-scm[toml]<9.0.0,>=8.0.0->linearmodels)
  Downloading setuptools_scm-8.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting interface-meta>=1.2.0 (from formulaic>=1.0.0->linearmodels)
  Downloading interface_meta-1.3.0-py3-none-any.whl.metadata (6.7 kB)
Downloading linearmodels-6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import numpy as np
import pandas as pd
from linearmodels.iv import IV2SLS
from stargazer.stargazer import Stargazer

# Data preparation

## Load data

In [3]:
# Load data from github

link_replication_file1 = 'https://raw.githubusercontent.com/Udzf/Israel-Palestine/refs/heads/main/replication_file1.csv'
link_replication_file2 = 'https://raw.githubusercontent.com/Udzf/Israel-Palestine/refs/heads/main/replication_file2.csv'
link_replication_file3 = 'https://raw.githubusercontent.com/Udzf/Israel-Palestine/refs/heads/main/replication_file3.csv'

replication_file1 = pd.read_csv(link_replication_file1)
replication_file2 = pd.read_csv(link_replication_file2)
replication_file3 = pd.read_csv(link_replication_file3)

# Replication

## Table 1

### Stata code

In [4]:
''' STATA CODE
************************************************************************
** Table 1. News Pressure and the Length of Conflict-related News
************************************************************************

use "$dta/replication_file1.dta", clear

* Panel A: Full sample

cap drop sample_deriv
xi: ivreg daily_woi (length_conflict_news=high_intensity) i.month i.year i.dow, cluster (monthyear)
gen sample_deriv=1 if e(sample)

reg length_conflict_news high_intensity i.month i.year i.dow  if sample_deriv==1 , vce(cluster monthyear)
test  high_intensity
test  high_intensity
scalar F_cont=r(F)
outreg2 using "$tables/table_1a.xls", replace ctitle("Length conflict news, 1st stage") keep(high_intensity) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

xi: ivreg daily_woi (length_conflict_news = high_intensity) i.month i.year i.dow  , cluster (monthyear)
outreg2 using "$tables/table_1a.xls", append ctitle("NP, 2SLS") keep(length_conflict_news) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

xi: ivreg daily_woi_nc (length_conflict_news = high_intensity) i.month i.year i.dow , cluster (monthyear)
outreg2 using "$tables/table_1a.xls", append ctitle("Uncorr NP, 2SLS") keep(length_conflict_news) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

* Panel A: Sample of days with an attack on the same day or the previous day

cap drop sample_deriv
xi: ivreg daily_woi (length_conflict_news=high_intensity) i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster (monthyear)
gen sample_deriv=1 if e(sample)

reg length_conflict_news high_intensity i.month i.year i.dow  if sample_deriv==1& (occurrence_t_y==1 | occurrence_pal_t_y ==1) , vce(cluster monthyear)
test  high_intensity
scalar F_cont=r(F)
outreg2 using "$tables/table_1b.xls", replace ctitle("Length conflict news, 1st stage") keep(high_intensity) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

xi: ivreg daily_woi (length_conflict_news = high_intensity) i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1) , cluster (monthyear)
outreg2 using "$tables/table_1b.xls", append ctitle("NP, 2SLS") keep(length_conflict_news) nocons label bdec(5) addstat ("F excl. instr.", F_cont) dec(3)

xi: ivreg daily_woi_nc (length_conflict_news = high_intensity) i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster (monthyear)
outreg2 using "$tables/table_1b.xls", append ctitle("Uncorr NP, 2SLS") keep(length_conflict_news) nocons label bdec(3) addstat ("F excl. instr.", F_cont)
'''

' STATA CODE\n************************************************************************\n** Table 1. News Pressure and the Length of Conflict-related News\n************************************************************************\n\nuse "$dta/replication_file1.dta", clear\n\n* Panel A: Full sample\n\ncap drop sample_deriv\nxi: ivreg daily_woi (length_conflict_news=high_intensity) i.month i.year i.dow, cluster (monthyear)\ngen sample_deriv=1 if e(sample)\n\nreg length_conflict_news high_intensity i.month i.year i.dow  if sample_deriv==1 , vce(cluster monthyear)\ntest  high_intensity\ntest  high_intensity\nscalar F_cont=r(F)\noutreg2 using "$tables/table_1a.xls", replace ctitle("Length conflict news, 1st stage") keep(high_intensity) nocons label bdec(3) addstat ("F excl. instr.", F_cont)\n\nxi: ivreg daily_woi (length_conflict_news = high_intensity) i.month i.year i.dow  , cluster (monthyear)\noutreg2 using "$tables/table_1a.xls", append ctitle("NP, 2SLS") keep(length_conflict_news) nocons

### Python code

#### Table 1: A. Full Sample

##### Creating models

In [5]:
# Data table 1
# Drop missing values from the relevant columns
data_table_1 = replication_file1.copy()
relevant_columns = ['daily_woi', 'daily_woi_nc', 'length_conflict_news', 'high_intensity', 'month', 'year', 'dow', 'monthyear']

df = data_table_1[relevant_columns].dropna()

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf


# Define the first-stage formula
first_stage_formula = 'length_conflict_news ~ high_intensity + C(month) + C(year) + C(dow)'

# Run the first-stage regression
first_stage = smf.ols(first_stage_formula, data=df).fit()

# Use the predicted values from the first stage
df['length_conflict_news_pred'] = first_stage.predict(df)

# Define the second-stage formula (corrected)
second_stage_formula_corrected = 'daily_woi ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_corrected = smf.ols(second_stage_formula_corrected, data=df).fit()


# Define the second-stage formula
second_stage_formula_uncorrected = 'daily_woi_nc ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_uncorrected = smf.ols(second_stage_formula_uncorrected, data=df).fit()


##### Displaying Table 1: A

In [6]:
# @title
stargazer = Stargazer([first_stage, second_stage_corrected, second_stage_uncorrected])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['Intercept', 'high_intensity', 'length_conflict_news_pred'])
stargazer.custom_columns(["Length of Conflict News [2SLS 1st stage]", "Corrected News Pressure [2SLS 2nd stage]", "Uncorrected News Pressure [2SLS 2nd stage]"], [1, 1, 1])

stargazer

0,1,2,3
,,,
,,,
,Length of Conflict News [2SLS 1st stage],Corrected News Pressure [2SLS 2nd stage],Uncorrected News Pressure [2SLS 2nd stage]
,(1),(2),(3)
,,,
Intercept,-2.594***,1.150***,1.135***
,(0.348),(0.034),(0.035)
high_intensity,5.046***,,
,(0.180),,
length_conflict_news_pred,,0.002,-0.017***


#### Table 1: B. Sample of Days with an Attack on the Same Day or the Previous Day

##### Creating models

In [7]:
# Data table 1
# Drop missing values from the relevant columns
data_table_1 = replication_file1.copy()
relevant_columns = ['daily_woi', 'daily_woi_nc', 'length_conflict_news', 'high_intensity', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
df = data_table_1[relevant_columns].dropna()

# Restrict sample to only include certain observations
df = df[(df['occurrence_t_y'] == 1) | (df['occurrence_pal_t_y'] == 1)].copy()

# Define the first-stage formula
first_stage_formula = 'length_conflict_news ~ high_intensity + C(month) + C(year) + C(dow)'

# Run the first-stage regression
first_stage = smf.ols(first_stage_formula, data=df).fit()

# Use the predicted values from the first stage
df['length_conflict_news_pred'] = first_stage.predict(df)

# Define the second-stage formula (corrected)
second_stage_formula_corrected = 'daily_woi ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_corrected = smf.ols(second_stage_formula_corrected, data=df).fit()


# Define the second-stage formula
second_stage_formula_uncorrected = 'daily_woi_nc ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_uncorrected = smf.ols(second_stage_formula_uncorrected, data=df).fit()


##### Displaying Table 1:B

In [8]:
# @title
stargazer = Stargazer([first_stage, second_stage_corrected, second_stage_uncorrected])

# Customize the output as needed
stargazer.covariate_order(['Intercept', 'high_intensity', 'length_conflict_news_pred'])
stargazer.title("Regression Results")
stargazer.custom_columns(["Length of Conflict News [2SLS 1st stage]", "Corrected News Pressure [2SLS 2nd stage]", "Uncorrected News Pressure [2SLS 2nd stage]"], [1, 1, 1])

stargazer

0,1,2,3
,,,
,,,
,Length of Conflict News [2SLS 1st stage],Corrected News Pressure [2SLS 2nd stage],Uncorrected News Pressure [2SLS 2nd stage]
,(1),(2),(3)
,,,
Intercept,-2.870***,1.141***,1.129***
,(0.494),(0.039),(0.040)
high_intensity,5.291***,,
,(0.247),,
length_conflict_news_pred,,0.000,-0.018***


## Table 2

### Stata code

In [9]:
'''
************************************************************************
** Table 2. Coverage of Conflict, News Pressure, and Google Searches
************************************************************************

use "$dta/replication_file1.dta", clear

eststo: xi: reg any_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , cluster(monthyear)
outreg2 using "$tables/table_2.xls", replace ctitle("Isr-Pal on news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)

eststo: xi: nbreg length_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , vce(cluster monthyear)
outreg2 using "$tables/table_2.xls", append ctitle("Time to Isr-Pal news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)

eststo: xi: reg any_conflict_news lnvic_t_y lnvic_pal_y daily_woi i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster(monthyear)
outreg2 using "$tables/table_2.xls", append ctitle("Isr-Pal on news") keep(lnvic_t_y lnvic_pal_y daily_woi) nocons label bdec(3)

eststo: xi: nbreg length_conflict_news lnvic_t_y lnvic_pal_y daily_woi i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), vce(cluster monthyear)
outreg2 using "$tables/table_2.xls", append ctitle("Time to Isr-Pal news") keep(lnvic_t_y lnvic_pal_y daily_woi) nocons label bdec(3)

xi: newey conflict_searches lnvic_t_y lnvic_pal_y monthyear i.month i.year i.dow  if length_conflict_news_t_t_1!=., lag(7) force
outreg2 using "$tables/table_2.xls", append stats(coef se) keep(lnvic_t_y lnvic_pal_y) nocons label bdec(3)

xi: newey conflict_searches lnvic_t_y lnvic_pal_y length_conflict_news_t_t_1  monthyear i.month i.year i.dow  , lag(7) force
outreg2 using "$tables/table_2.xls", append stats(coef se) keep(lnvic_t_y lnvic_pal_y length_conflict_news_t_t_1) nocons label bdec(3) sdec(3)

* Corresponding OLS regressions estimated below to display R-squared
eststo: xi: reg conflict_searches lnvic_t_y lnvic_pal_y monthyear i.month i.year i.dow if length_conflict_news_t_t_1!=., vce(cluster monthyear)
eststo: xi: reg conflict_searches lnvic_t_y lnvic_pal_y length_conflict_news_t_t_1 monthyear i.month i.year i.dow , vce(cluster monthyear)
esttab, se pr2 r2 star(* 0.1 ** 0.05 *** 0.01)
'''

'\n************************************************************************\n** Table 2. Coverage of Conflict, News Pressure, and Google Searches\n************************************************************************\n\nuse "$dta/replication_file1.dta", clear\n\neststo: xi: reg any_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , cluster(monthyear)\noutreg2 using "$tables/table_2.xls", replace ctitle("Isr-Pal on news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)\n\neststo: xi: nbreg length_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , vce(cluster monthyear)\noutreg2 using "$tables/table_2.xls", append ctitle("Time to Isr-Pal news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)\n\neststo: xi: reg any_conflict_news lnvic_t_y lnvic_pal_y daily_woi i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster(monthyear)\noutreg2 using "$tables/table_2.xls", append ctitle("Isr-Pal on news") ke

### Python code

##### Creating the models

In [10]:
data_table_2 = replication_file1.copy()
relevant_columns = ['any_conflict_news', 'length_conflict_news', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
data_table_2 = data_table_2[relevant_columns].dropna()


data = data_table_2.copy()


# First column
# Adding interaction terms for categorical variables like i.month, i.year, and i.dow
data['month'] = data['month'].astype('category')
data['year'] = data['year'].astype('category')
data['dow'] = data['dow'].astype('category')

# Defining the regression formula
formula = "any_conflict_news ~ occurrence_t_y + occurrence_pal_t_y + C(month) + C(year) + C(dow)"

# Running the regression with clustering on 'monthyear'
model_1 = smf.ols(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['monthyear']})

In [11]:
# Second column
# Specify that the month, year, and dow variables are categorical
data['month'] = data['month'].astype('category')
data['year'] = data['year'].astype('category')
data['dow'] = data['dow'].astype('category')

# Define the regression formula
formula = "length_conflict_news ~ occurrence_t_y + occurrence_pal_t_y + C(month) + C(year) + C(dow)"

# Fit the negative binomial regression model
model_2 = smf.negativebinomial(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['monthyear']})

  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


         Current function value: 0.758654
         Iterations: 35
         Function evaluations: 41
         Gradient evaluations: 41


In [12]:
# Third column

data_table_2 = replication_file1.copy()
relevant_columns = ['any_conflict_news', 'daily_woi', 'lnvic_pal_y', 'lnvic_t_y', 'length_conflict_news', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
data_table_2 = data_table_2[relevant_columns].dropna()

data = data_table_2.copy()

# Filter the data for the specified condition
filtered_data = data[(data['occurrence_t_y'] == 1) | (data['occurrence_pal_t_y'] == 1)].copy()

# Ensure categorical variables for month, year, and day of the week
filtered_data['month'] = filtered_data['month'].astype('category')
filtered_data['year'] = filtered_data['year'].astype('category')
filtered_data['dow'] = filtered_data['dow'].astype('category')

# Define the regression formula
formula = "any_conflict_news ~ lnvic_t_y + lnvic_pal_y + daily_woi + C(month) + C(year) + C(dow)"

# Fit the linear regression model with clustered standard errors
model_3 = smf.ols(formula, data=filtered_data).fit(cov_type='cluster', cov_kwds={'groups': filtered_data['monthyear']})

In [13]:
data_table_2 = replication_file1.copy()
relevant_columns = ['any_conflict_news', 'daily_woi', 'lnvic_pal_y', 'lnvic_t_y', 'length_conflict_news', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
data_table_2 = data_table_2[relevant_columns].dropna()

data = data_table_2.copy()

# Filter the data for the specified condition
filtered_data = data[(data['occurrence_t_y'] == 1) | (data['occurrence_pal_t_y'] == 1)].copy()

# Ensure categorical variables for month, year, and day of the week
filtered_data['month'] = filtered_data['month'].astype('category')
filtered_data['year'] = filtered_data['year'].astype('category')
filtered_data['dow'] = filtered_data['dow'].astype('category')

# Define the regression formula
formula = "length_conflict_news ~ lnvic_t_y + lnvic_pal_y + daily_woi + C(month) + C(year) + C(dow)"

# Fit the negative binomial regression model with clustered standard errors
model_4 = smf.negativebinomial(formula, data=filtered_data).fit(cov_type='cluster', cov_kwds={'groups': filtered_data['monthyear']})

  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


         Current function value: 1.011429
         Iterations: 35
         Function evaluations: 64
         Gradient evaluations: 64


In [14]:
# Column 5

data_table_2 = replication_file1.copy()
relevant_columns = ['length_conflict_news_t_t_1', 'conflict_searches', 'lnvic_pal_y', 'lnvic_t_y',  'month', 'year', 'dow', 'monthyear' ]
data_table_2 = data_table_2[relevant_columns].dropna()

data = data_table_2.copy()

# Filter the data for the specified condition
filtered_data = data[data['length_conflict_news_t_t_1'].notnull()]

# Ensure categorical variables for month, year, and day of the week
filtered_data['month'] = filtered_data['month'].astype('category')
filtered_data['year'] = filtered_data['year'].astype('category')
filtered_data['dow'] = filtered_data['dow'].astype('category')

# Define the regression formula
formula = "conflict_searches ~ lnvic_t_y + lnvic_pal_y + monthyear + C(month) + C(year) + C(dow)"

# Fit the linear regression model with HAC standard errors (lag=7)
model_5 = smf.ols(formula, data=filtered_data).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [15]:
# Column 6

# Ensure categorical variables for month, year, and day of the week
data['month'] = data['month'].astype('category')
data['year'] = data['year'].astype('category')
data['dow'] = data['dow'].astype('category')

# Define the regression formula
formula = "conflict_searches ~ lnvic_t_y + lnvic_pal_y + length_conflict_news_t_t_1 + monthyear + C(month) + C(year) + C(dow)"

# Fit the linear regression model with HAC standard errors (lag=7)
model_6 = smf.ols(formula, data=data).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

#### Displaying Table 2

In [16]:
# @title
# @ ##### Table 2
stargazer = Stargazer([model_1, model_2, model_3, model_4, model_5, model_6])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['occurrence_t_y', 'occurrence_pal_t_y', 'daily_woi', 'lnvic_t_y', 'lnvic_pal_y', 'length_conflict_news_t_t_1'])
stargazer.custom_columns(["Any news (OLS) - all days - 2000-11", "Length news (bin) - all days - 2000-11", "Any news (OLS) - t/t-1 - 2000-11", "Length news (bin) - t/t-1 - 2000-11", "Searches (OLS) - all days - 2004-11", "Searches (OLS) - all days - 2004-11"], [1, 1, 1, 1 ,1, 1])

stargazer

0,1,2,3,4,5,6
,,,,,,
,,,,,,
,Any news (OLS) - all days - 2000-11,Length news (bin) - all days - 2000-11,Any news (OLS) - t/t-1 - 2000-11,Length news (bin) - t/t-1 - 2000-11,Searches (OLS) - all days - 2004-11,Searches (OLS) - all days - 2004-11
,(1),(2),(3),(4),(5),(6)
,,,,,,
occurrence_t_y,0.100***,0.985***,,,,
,(0.020),(0.183),,,,
occurrence_pal_t_y,0.112***,0.689***,,,,
,(0.032),(0.139),,,,
daily_woi,,,-0.078*,-0.661***,,


## Table 3

### Stata code

In [17]:
'''

*************************************************************************
** Table 3. Israeli Attacks and News Pressure
************************************************************************

use "$dta/replication_file1.dta", clear

* Panel A: News Pressure

sort date

xi: reg occurrence daily_woi i.month i.year i.dow if gaza_war==0, cluster(monthyear)
outreg2 using "$tables/table_3a.xls", replace ctitle("Occurrence") keep(daily_woi) nocons label bdec(3)

xi: newey occurrence daily_woi leaddaily_woi i.month i.year i.dow if gaza_war==0, lag(7) force
outreg2 using  "$tables/table_3a.xls", append ctitle("Occurrence") keep(daily_woi leaddaily_woi lagdaily_woi occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

xi: newey occurrence daily_woi leaddaily_woi lagdaily_woi-lagdaily_woi7 occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, lag(7) force
outreg2 using  "$tables/table_3a.xls", append ctitle("Occurrence") keep(daily_woi leaddaily_woi lagdaily_woi occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

xi: reg lnvic daily_woi i.month i.year i.dow if gaza_war==0, cluster(monthyear)
outreg2 using  "$tables/table_3a.xls", append ctitle("Ln(victims)") keep(daily_woi) nocons label bdec(3)

xi: newey lnvic daily_woi leaddaily_woi  i.month i.year i.dow if gaza_war==0,lag(7) force
outreg2 using  "$tables/table_3a.xls", append ctitle("Ln(victims)") keep(daily_woi leaddaily_woi) nocons label bdec(3)

xi: newey lnvic daily_woi leaddaily_woi lagdaily_woi-lagdaily_woi7 occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, lag(7) force
outreg2 using  "$tables/table_3a.xls", append ctitle("Ln(victims)") keep(daily_woi leaddaily_woi lagdaily_woi occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

xi: glm victims_isr daily_woi leaddaily_woi lagdaily_woi-lagdaily_woi7 occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, family(nbinom ml) vce(hac nwest 7)
outreg2 using  "$tables/table_3a.xls", append ctitle("Num. victims") keep(daily_woi leaddaily_woi lagdaily_woi occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

* Corresponding OLS regressions estimated below to display (pseudo) R-squared
eststo clear
eststo: xi: reg occurrence daily_woi i.month i.year i.dow if gaza_war==0 , cluster(monthyear)
eststo: xi: reg occurrence daily_woi leaddaily_woi i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg occurrence daily_woi leaddaily_woi lagdaily_woi-lagdaily_woi7 occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg lnvic daily_woi i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg lnvic daily_woi leaddaily_woi i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg lnvic daily_woi leaddaily_woi lagdaily_woi-lagdaily_woi7 occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: nbreg victims_isr daily_woi leaddaily_woi lagdaily_woi-lagdaily_woi7 occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, vce(cluster monthyear)
esttab, se r2 pr2 star(* 0.10 ** 0.05 *** 0.01)

* Panel B: Uncorrected news pressure

sort date

xi: reg occurrence daily_woi_nc i.month i.year i.dow if gaza_war==0, cluster(monthyear)
outreg2 using  "$tables/table_3b.xls", replace ctitle("Occurrence") keep(daily_woi_nc) nocons label bdec(3)

xi: newey occurrence daily_woi_nc leaddaily_woi_nc  i.month i.year i.dow if gaza_war==0, lag(7) force
outreg2 using  "$tables/table_3b.xls", append ctitle("Occurrence") keep(daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

xi: newey occurrence daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc-lagdaily_woi7_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, lag(7) force
outreg2 using "$tables/table_3b.xls", append ctitle("Occurrence") keep(daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

xi: reg lnvic daily_woi_nc i.month i.year i.dow if gaza_war==0, cluster(monthyear)
outreg2 using "$tables/table_3b.xls", append ctitle("Ln(victims)") keep(daily_woi_nc) nocons label bdec(3)

xi: newey lnvic daily_woi_nc leaddaily_woi_nc i.month i.year i.dow if gaza_war==0,lag(7) force
outreg2 using "$tables/table_3b.xls", append ctitle("Ln(victims)") keep(daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

xi: newey lnvic daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc-lagdaily_woi7_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, lag(7) force
outreg2 using "$tables/table_3b.xls", append ctitle("Ln(victims)") keep(daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

xi: glm victims_isr daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc-lagdaily_woi7_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, family(nbinom ml) vce(hac nwest 7)
outreg2 using "$tables/table_3b.xls", append ctitle("Num. victims") keep(daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)

* Corresponding OLS regressions estimated below to display (pseudo) R-squared
eststo clear
eststo: xi: reg occurrence daily_woi_nc i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg occurrence daily_woi_nc leaddaily_woi_nc i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg occurrence daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc-lagdaily_woi7_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg lnvic daily_woi_nc i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg lnvic daily_woi_nc leaddaily_woi_nc i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: xi: reg lnvic daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc-lagdaily_woi7_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, cluster(monthyear)
eststo: nbreg victims_isr daily_woi_nc leaddaily_woi_nc lagdaily_woi_nc-lagdaily_woi7_nc occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, cluster(monthyear)
esttab, se r2 pr2 star(* 0.10 ** 0.05 *** 0.01) compress

'''

'\n\n*************************************************************************\n** Table 3. Israeli Attacks and News Pressure\n************************************************************************\n\nuse "$dta/replication_file1.dta", clear\n\n* Panel A: News Pressure\n\nsort date\n\nxi: reg occurrence daily_woi i.month i.year i.dow if gaza_war==0, cluster(monthyear)\noutreg2 using "$tables/table_3a.xls", replace ctitle("Occurrence") keep(daily_woi) nocons label bdec(3)\n\nxi: newey occurrence daily_woi leaddaily_woi i.month i.year i.dow if gaza_war==0, lag(7) force\noutreg2 using  "$tables/table_3a.xls", append ctitle("Occurrence") keep(daily_woi leaddaily_woi lagdaily_woi occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14) nocons label bdec(3)\n\nxi: newey occurrence daily_woi leaddaily_woi lagdaily_woi-lagdaily_woi7 occurrence_pal_1 occurrence_pal_2_7 occurrence_pal_8_14 i.month i.year i.dow if gaza_war==0, lag(7) force\noutreg2 using  "$tables/table_3a.xls", append ctitle("O

### Python

#### Creating models (corrected news pressure)

In [18]:
# Column 1 models
data_table_3 = replication_file1.copy()
relevant_columns = ['occurrence', 'daily_woi', 'month', 'year', 'dow', 'monthyear', 'gaza_war']
data_table_3 = data_table_3[relevant_columns].dropna()

filtered_df = data_table_3[data_table_3['gaza_war'] == 0]

formula = "occurrence ~ daily_woi + C(month) + C(year) + C(dow)"

# Fit the regression model with clustered standard errors
model_1 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='cluster', cov_kwds={'groups': filtered_df['monthyear']})

In [19]:
data_table_3 = replication_file1.copy()
relevant_columns = ['occurrence', 'daily_woi', 'month', 'year', 'dow', 'monthyear', 'gaza_war', 'leaddaily_woi']
data_table_3 = data_table_3[relevant_columns].dropna()

filtered_df = data_table_3[data_table_3['gaza_war'] == 0]

# Define the regression formula
formula = "occurrence ~ daily_woi + leaddaily_woi + C(month) + C(year) + C(dow)"

# Fit the model using OLS
model_2 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})  # Newey-West with 7 lags

In [20]:
# Select relevant columns and drop missing values
data_table_3 = replication_file1.copy()
relevant_columns = ['occurrence', 'daily_woi', 'leaddaily_woi', "occurrence_pal_1",
                    "occurrence_pal_2_7", "occurrence_pal_8_14", 'month', 'year',
                    'dow', 'monthyear', 'gaza_war', 'lagdaily_woi']

data_table_3 = data_table_3[relevant_columns]

# Filter rows where gaza_war == 0 and create a copy
filtered_df = data_table_3[data_table_3['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    filtered_df[f'lagdaily_woi{lag}'] = filtered_df['daily_woi'].shift(lag)

# Drop rows with NaN values after creating lagged variables
filtered_df = filtered_df.dropna()

# Define the formula for the regression
formula = ("occurrence ~ daily_woi + leaddaily_woi + lagdaily_woi1 + lagdaily_woi2 + "
           "lagdaily_woi3 + lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
           "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
           "C(month) + C(year) + C(dow)")

# Fit the model using heteroskedasticity and autocorrelation-consistent (HAC) standard errors
model_3 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [21]:
# Filter and select relevant columns
data_table_3 = replication_file1.copy()
relevant_columns = ['lnvic', 'daily_woi', 'month', 'year', 'dow', 'monthyear', 'gaza_war']
data_table_3 = data_table_3[relevant_columns].dropna()

# Filter the data based on the condition gaza_war == 0
filtered_df = data_table_3[data_table_3['gaza_war'] == 0]

# Define the formula for the regression
formula = "lnvic ~ daily_woi + C(month) + C(year) + C(dow)"

# Fit the regression model with clustered standard errors
model_4 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='cluster', cov_kwds={'groups': filtered_df['monthyear']})

In [22]:
# Copy and filter relevant columns
data_table_3 = replication_file1.copy()
relevant_columns = ['lnvic', 'daily_woi', 'leaddaily_woi', 'month', 'year', 'dow', 'gaza_war']
data_table = data_table_3[relevant_columns].dropna()

# Filter data where gaza_war == 0
filtered_df = data_table[data_table['gaza_war'] == 0]

# Define the formula for regression
formula = "lnvic ~ daily_woi + leaddaily_woi + C(month) + C(year) + C(dow)"

# Prepare the design matrices for sm.OLS
model_5 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [23]:
# Copy and filter relevant columns
data_table_3 = replication_file1.copy()
relevant_columns = ['lnvic','occurrence', 'daily_woi', 'leaddaily_woi', "occurrence_pal_1",
                    "occurrence_pal_2_7", "occurrence_pal_8_14", 'month', 'year',
                    'dow', 'monthyear', 'gaza_war', 'lagdaily_woi']

data_table_3 = data_table_3[relevant_columns]

# Filter rows where gaza_war == 0 and create a copy
filtered_df = data_table_3[data_table_3['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    filtered_df[f'lagdaily_woi{lag}'] = filtered_df['daily_woi'].shift(lag)

# Drop rows with NaN values after creating lagged variables
filtered_df = filtered_df.dropna()

# Define the formula for the regression
formula = ("lnvic ~ daily_woi + leaddaily_woi + lagdaily_woi1 + lagdaily_woi2 + "
           "lagdaily_woi3 + lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
           "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
           "C(month) + C(year) + C(dow)")

# Prepare the design matrices for sm.OLS
model_6 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [24]:
import pandas as pd
import statsmodels.api as sm

# Filter relevant columns
relevant_columns = [
    'victims_isr', 'occurrence', 'daily_woi', 'leaddaily_woi',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'lagdaily_woi'
]

data_table_3 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_3[data_table_3['gaza_war'] == 0].copy()

# Ensure the categorical variables are correctly formatted
for col in ['month', 'year', 'dow']:
    df_filtered[col] = df_filtered[col].astype(str).astype('category')

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

# Drop rows with NaN due to lagging
df_filtered.dropna(inplace=True)

# Define the formula for the model
formula = (
    "victims_isr ~ daily_woi + leaddaily_woi + "
    "lagdaily_woi + lagdaily_woi1 + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow) "
)

# Fit the Negative Binomial model
model_7 = sm.NegativeBinomial.from_formula(formula, data=df_filtered).fit()

         Current function value: 1.319268
         Iterations: 35
         Function evaluations: 38
         Gradient evaluations: 38


  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


In [25]:
# Column 1 models
data_table_3 = replication_file1.copy()
relevant_columns = ['occurrence', 'daily_woi_nc', 'month', 'year', 'dow', 'monthyear', 'gaza_war']
data_table_3 = data_table_3[relevant_columns].dropna()

filtered_df = data_table_3[data_table_3['gaza_war'] == 0]

formula = "occurrence ~ daily_woi_nc + C(month) + C(year) + C(dow)"

# Fit the regression model with clustered standard errors
model_1_nc = smf.ols(formula=formula, data=filtered_df).fit(cov_type='cluster', cov_kwds={'groups': filtered_df['monthyear']})

In [26]:
data_table_3 = replication_file1.copy()
relevant_columns = ['occurrence', 'daily_woi_nc', 'month', 'year', 'dow', 'monthyear', 'gaza_war', 'leaddaily_woi_nc']
data_table_3 = data_table_3[relevant_columns].dropna()

filtered_df = data_table_3[data_table_3['gaza_war'] == 0]

# Define the regression formula
formula = "occurrence ~ daily_woi_nc + leaddaily_woi_nc + C(month) + C(year) + C(dow)"

# Fit the model using OLS
model_2_nc = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})  # Newey-West with 7 lags

In [27]:
# Select relevant columns and drop missing values
data_table_3 = replication_file1.copy()
relevant_columns = ['occurrence', 'daily_woi_nc', 'leaddaily_woi_nc', "occurrence_pal_1",
                    "occurrence_pal_2_7", "occurrence_pal_8_14", 'month', 'year',
                    'dow', 'monthyear', 'gaza_war', 'lagdaily_woi']

data_table_3 = data_table_3[relevant_columns]

# Filter rows where gaza_war == 0 and create a copy
filtered_df = data_table_3[data_table_3['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    filtered_df[f'lagdaily_woi_nc{lag}'] = filtered_df['daily_woi_nc'].shift(lag)

# Drop rows with NaN values after creating lagged variables
filtered_df = filtered_df.dropna()

# Define the formula for the regression
formula = ("occurrence ~ daily_woi_nc + leaddaily_woi_nc + lagdaily_woi_nc1 + lagdaily_woi_nc2 + "
           "lagdaily_woi_nc3 + lagdaily_woi_nc4 + lagdaily_woi_nc5 + lagdaily_woi_nc6 + lagdaily_woi_nc7 + "
           "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
           "C(month) + C(year) + C(dow)")

# Fit the model using heteroskedasticity and autocorrelation-consistent (HAC) standard errors
model_3_nc = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [28]:
# Filter and select relevant columns
data_table_3 = replication_file1.copy()
relevant_columns = ['lnvic', 'daily_woi_nc', 'month', 'year', 'dow', 'monthyear', 'gaza_war']
data_table_3 = data_table_3[relevant_columns].dropna()

# Filter the data based on the condition gaza_war == 0
filtered_df = data_table_3[data_table_3['gaza_war'] == 0]

# Define the formula for the regression
formula = "lnvic ~ daily_woi_nc + C(month) + C(year) + C(dow)"

# Fit the regression model with clustered standard errors
model_4_nc = smf.ols(formula=formula, data=filtered_df).fit(cov_type='cluster', cov_kwds={'groups': filtered_df['monthyear']})

In [29]:
# Copy and filter relevant columns
data_table_3 = replication_file1.copy()
relevant_columns = ['lnvic', 'daily_woi_nc', 'leaddaily_woi_nc', 'month', 'year', 'dow', 'gaza_war']
data_table = data_table_3[relevant_columns].dropna()

# Filter data where gaza_war == 0
filtered_df = data_table[data_table['gaza_war'] == 0]

# Define the formula for regression
formula = "lnvic ~ daily_woi_nc + leaddaily_woi_nc + C(month) + C(year) + C(dow)"

# Prepare the design matrices for sm.OLS
model_5_nc = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [30]:
# Copy and filter relevant columns
data_table_3 = replication_file1.copy()
relevant_columns = ['lnvic','occurrence', 'daily_woi_nc', 'leaddaily_woi_nc', "occurrence_pal_1",
                    "occurrence_pal_2_7", "occurrence_pal_8_14", 'month', 'year',
                    'dow', 'monthyear', 'gaza_war', 'lagdaily_woi_nc']

data_table_3 = data_table_3[relevant_columns]

# Filter rows where gaza_war == 0 and create a copy
filtered_df = data_table_3[data_table_3['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    filtered_df[f'lagdaily_woi_nc{lag}'] = filtered_df['daily_woi_nc'].shift(lag)

# Drop rows with NaN values after creating lagged variables
filtered_df = filtered_df.dropna()

# Define the formula for the regression
formula = ("lnvic ~ daily_woi_nc + leaddaily_woi_nc + lagdaily_woi_nc1 + lagdaily_woi_nc2 + "
           "lagdaily_woi_nc3 + lagdaily_woi_nc4 + lagdaily_woi_nc5 + lagdaily_woi_nc6 + lagdaily_woi_nc7 + "
           "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
           "C(month) + C(year) + C(dow)")

# Prepare the design matrices for sm.OLS
model_6_nc = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [31]:
# Filter relevant columns
relevant_columns = [
    'victims_isr', 'occurrence', 'daily_woi_nc', 'leaddaily_woi_nc',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'lagdaily_woi_nc'
]

data_table_3 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_3[data_table_3['gaza_war'] == 0].copy()

# Ensure the categorical variables are correctly formatted
for col in ['month', 'year', 'dow']:
    df_filtered[col] = df_filtered[col].astype(str).astype('category')

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi_nc{lag}'] = df_filtered['daily_woi_nc'].shift(lag)

# Drop rows with NaN due to lagging
df_filtered.dropna(inplace=True)

print(df_filtered.dtypes)

# Define the formula for the model
formula = (
    "victims_isr ~ daily_woi_nc + leaddaily_woi_nc + "
    "lagdaily_woi_nc + lagdaily_woi_nc1 + lagdaily_woi_nc2 + lagdaily_woi_nc3 + "
    "lagdaily_woi_nc4 + lagdaily_woi_nc5 + lagdaily_woi_nc6 + lagdaily_woi_nc7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow) "
)

# Fit the Negative Binomial model
model_7_nc = sm.NegativeBinomial.from_formula(formula, data=df_filtered).fit()

victims_isr             float64
occurrence              float64
daily_woi_nc            float64
leaddaily_woi_nc        float64
occurrence_pal_1        float64
occurrence_pal_2_7      float64
occurrence_pal_8_14     float64
month                  category
year                   category
dow                    category
monthyear               float64
gaza_war                float64
lagdaily_woi_nc         float64
lagdaily_woi_nc1        float64
lagdaily_woi_nc2        float64
lagdaily_woi_nc3        float64
lagdaily_woi_nc4        float64
lagdaily_woi_nc5        float64
lagdaily_woi_nc6        float64
lagdaily_woi_nc7        float64
dtype: object
         Current function value: 1.320865
         Iterations: 35
         Function evaluations: 39
         Gradient evaluations: 39


  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


#### Dispalying Table 3

In [32]:
# @title ##### Regression Table 3 (corrected news pressure)
stargazer = Stargazer([model_1, model_2, model_3, model_4, model_5, model_6, model_7])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['daily_woi', 'leaddaily_woi', 'lagdaily_woi1', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14'])
stargazer.custom_columns(["1", "2", "3", "4", "5", "6", "7"], [1, 1, 1, 1 ,1, 1, 1])

stargazer

0,1,2,3,4,5,6,7
,,,,,,,
,,,,,,,
,1,2,3,4,5,6,7
,(1),(2),(3),(4),(5),(6),(7)
,,,,,,,
daily_woi,0.074**,0.030,0.026,0.130**,0.058,0.028,0.045
,(0.032),(0.034),(0.035),(0.052),(0.050),(0.046),(0.163)
leaddaily_woi,,0.084**,0.078**,,0.138***,0.121**,0.471***
,,(0.033),(0.035),,(0.047),(0.048),(0.152)
lagdaily_woi1,,,-0.028,,,-0.037,-3.629


In [33]:
# @title ##### Regression Table 3 (uncorrected news pressure)
stargazer = Stargazer([model_1_nc, model_2_nc, model_3_nc, model_4_nc, model_5_nc, model_6_nc, model_7_nc])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['daily_woi_nc', 'leaddaily_woi_nc', 'lagdaily_woi_nc1', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14'])
stargazer.custom_columns(["1", "2", "3", "4", "5", "6", "7"], [1, 1, 1, 1 ,1, 1, 1])

stargazer

0,1,2,3,4,5,6,7
,,,,,,,
,,,,,,,
,1,2,3,4,5,6,7
,(1),(2),(3),(4),(5),(6),(7)
,,,,,,,
daily_woi_nc,0.027,-0.007,-0.002,0.021,-0.023,-0.036,-0.195
,(0.034),(0.034),(0.036),(0.065),(0.053),(0.049),(0.167)
leaddaily_woi_nc,,0.063*,0.064*,,0.080*,0.076,0.326**
,,(0.034),(0.035),,(0.048),(0.048),(0.154)
lagdaily_woi_nc1,,,-0.031,,,-0.032,-4.954


## Table 4

### Python code

#### Creating the models

In [50]:
# Column 1 models
data_table_4 = replication_file1.copy()
relevant_columns = ['occurrence_pal', 'daily_woi', 'month', 'year', 'dow', 'monthyear', 'gaza_war']
data_table_4 = data_table_4[relevant_columns].dropna()

filtered_df = data_table_4[data_table_4['gaza_war'] == 0]

formula = "occurrence_pal ~ daily_woi + C(month) + C(year) + C(dow)"

# Fit the regression model with clustered standard errors
model_1 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='cluster', cov_kwds={'groups': filtered_df['monthyear']})

In [51]:
data_table_4 = replication_file1.copy()
relevant_columns = ['occurrence_pal', 'daily_woi', 'month', 'year', 'dow', 'monthyear', 'gaza_war', 'leaddaily_woi']
data_table_4 = data_table_4[relevant_columns].dropna()

filtered_df = data_table_4[data_table_4['gaza_war'] == 0]

# Define the regression formula
formula = "occurrence_pal ~ daily_woi + leaddaily_woi + C(month) + C(year) + C(dow)"

# Fit the model using OLS
model_2 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})  # Newey-West with 7 lags

In [52]:
# Select relevant columns and drop missing values
data_table_4 = replication_file1.copy()
relevant_columns = ['occurrence_pal', 'daily_woi', 'leaddaily_woi', "occurrence_pal_1",
                    "occurrence_pal_2_7", "occurrence_pal_8_14", 'month', 'year',
                    'dow', 'monthyear', 'gaza_war', 'lagdaily_woi']

data_table_4 = data_table_4[relevant_columns]

# Filter rows where gaza_war == 0 and create a copy
filtered_df = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    filtered_df[f'lagdaily_woi{lag}'] = filtered_df['daily_woi'].shift(lag)

# Drop rows with NaN values after creating lagged variables
filtered_df = filtered_df.dropna()

# Define the formula for the regression
formula = ("occurrence_pal ~ daily_woi + leaddaily_woi + lagdaily_woi1 + lagdaily_woi2 + "
           "lagdaily_woi3 + lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
           "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
           "C(month) + C(year) + C(dow)")

# Fit the model using heteroskedasticity and autocorrelation-consistent (HAC) standard errors
model_3 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [53]:
# Filter and select relevant columns
data_table_4 = replication_file1.copy()
relevant_columns = ['lnvic_pal', 'daily_woi', 'month', 'year', 'dow', 'monthyear', 'gaza_war']
data_table_4 = data_table_4[relevant_columns].dropna()

# Filter the data based on the condition gaza_war == 0
filtered_df = data_table_4[data_table_4['gaza_war'] == 0]

# Define the formula for the regression
formula = "lnvic_pal ~ daily_woi + C(month) + C(year) + C(dow)"

# Fit the regression model with clustered standard errors
model_4 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='cluster', cov_kwds={'groups': filtered_df['monthyear']})

In [54]:
# Copy and filter relevant columns
data_table_4 = replication_file1.copy()
relevant_columns = ['lnvic_pal', 'daily_woi', 'leaddaily_woi', 'month', 'year', 'dow', 'gaza_war']
data_table = data_table_4[relevant_columns].dropna()

# Filter data where gaza_war == 0
filtered_df = data_table[data_table['gaza_war'] == 0]

# Define the formula for regression
formula = "lnvic_pal ~ daily_woi + leaddaily_woi + C(month) + C(year) + C(dow)"

# Prepare the design matrices for sm.OLS
model_5 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [55]:
# Copy and filter relevant columns
data_table_4 = replication_file1.copy()
relevant_columns = ['lnvic_pal','occurrence', 'daily_woi', 'leaddaily_woi', "occurrence_pal_1",
                    "occurrence_pal_2_7", "occurrence_pal_8_14", 'month', 'year',
                    'dow', 'monthyear', 'gaza_war', 'lagdaily_woi']

data_table_4 = data_table_4[relevant_columns]

# Filter rows where gaza_war == 0 and create a copy
filtered_df = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    filtered_df[f'lagdaily_woi{lag}'] = filtered_df['daily_woi'].shift(lag)

# Drop rows with NaN values after creating lagged variables
filtered_df = filtered_df.dropna()

# Define the formula for the regression
formula = ("lnvic_pal ~ daily_woi + leaddaily_woi + lagdaily_woi1 + lagdaily_woi2 + "
           "lagdaily_woi3 + lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
           "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
           "C(month) + C(year) + C(dow)")

# Prepare the design matrices for sm.OLS
model_6 = smf.ols(formula=formula, data=filtered_df).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [56]:
import pandas as pd
import statsmodels.api as sm

# Filter relevant columns
relevant_columns = [
    'victims_pal', 'occurrence', 'daily_woi', 'leaddaily_woi',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'lagdaily_woi'
]

data_table_4 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Ensure the categorical variables are correctly formatted
for col in ['month', 'year', 'dow']:
    df_filtered[col] = df_filtered[col].astype(str).astype('category')

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

# Drop rows with NaN due to lagging
df_filtered.dropna(inplace=True)

# Define the formula for the model
formula = (
    "victims_pal ~ daily_woi + leaddaily_woi + "
    "lagdaily_woi + lagdaily_woi1 + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow) "
)

# Fit the Negative Binomial model
model_7 = sm.NegativeBinomial.from_formula(formula, data=df_filtered).fit()

         Current function value: 0.309785
         Iterations: 35
         Function evaluations: 38
         Gradient evaluations: 38


  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


#### Displaying Table 4

In [57]:
# @title ##### Regression Table 3 (corrected news pressure)
stargazer = Stargazer([model_1, model_2, model_3, model_4, model_5, model_6, model_7])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['daily_woi', 'leaddaily_woi', 'lagdaily_woi1', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14'])
stargazer.custom_columns(["1", "2", "3", "4", "5", "6", "7"], [1, 1, 1, 1 ,1, 1, 1])

stargazer

0,1,2,3,4,5,6,7
,,,,,,,
,,,,,,,
,1,2,3,4,5,6,7
,(1),(2),(3),(4),(5),(6),(7)
,,,,,,,
daily_woi,-0.008,-0.015,-0.003,-0.014,-0.023,-0.015,-0.327
,(0.013),(0.015),(0.017),(0.016),(0.018),(0.021),(0.429)
leaddaily_woi,,0.013,0.019,,0.017,0.021,0.270
,,(0.018),(0.018),,(0.019),(0.019),(0.415)
lagdaily_woi1,,,-0.023,,,-0.027,21.510


## Table 5

### Stata code

### Python code

#### Creating the models

In [58]:
# Filter relevant columns
relevant_columns = ['leaddaily_woi', 'lead_maj_events',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'gaza_war', 'monthyear'
]

data_table_4 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
filtered_data = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Drop rows with NaN due to lagging
filtered_data.dropna(inplace=True)

# Define the formula for the regression
formula = "leaddaily_woi ~ lead_maj_events + occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + C(month) + C(year) + C(dow)"



# Perform the regression with clustered standard errors
model_1 = smf.ols(formula, data=filtered_data).fit(cov_type='cluster', cov_kwds={'groups': filtered_data['monthyear']})

In [59]:
# Column 2
relevant_columns = ['leaddaily_woi_nc', 'lead_maj_events',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'gaza_war', 'monthyear']

data_table_4 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
filtered_data = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Drop rows with NaN due to lagging
filtered_data.dropna(inplace=True)

# Define the formula for the regression
formula = "leaddaily_woi_nc ~ lead_maj_events + occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + C(month) + C(year) + C(dow)"



# Perform the regression with clustered standard errors
model_2 = smf.ols(formula, data=filtered_data).fit(cov_type='cluster', cov_kwds={'groups': filtered_data['monthyear']})

In [60]:
from linearmodels.iv import IV2SLS
import pandas as pd

# Column 2
relevant_columns = ['leaddaily_woi', 'occurrence', 'lead_maj_events',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'gaza_war', 'monthyear'
]

data_table_4 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
filtered_data = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Drop rows with NaN due to lagging
filtered_data.dropna(inplace=True)


# Define dependent variable (y)
y = filtered_data['occurrence']

# Define endogenous variable (leaddaily_woi)
endog = filtered_data['leaddaily_woi']

# Define instrument for the endogenous variable (lead_maj_events)
instrument = filtered_data['lead_maj_events']

# Define exogenous variables (including categorical dummies for month, year, and day of the week)
exog = pd.concat([
    filtered_data[['occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14']],
    pd.get_dummies(filtered_data['month'], prefix='month', drop_first=True),
    pd.get_dummies(filtered_data['year'], prefix='year', drop_first=True),
    pd.get_dummies(filtered_data['dow'], prefix='dow', drop_first=True)
], axis=1)

# Perform the IV regression (2SLS)
model_3 = IV2SLS(y, exog, endog, instrument).fit(cov_type='clustered', clusters=filtered_data['monthyear'])

In [61]:
# Column 4
from linearmodels.iv import IV2SLS
import pandas as pd

# Column 2
relevant_columns = ['leaddaily_woi_nc', 'occurrence', 'lead_maj_events',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'gaza_war', 'monthyear'
]

data_table_4 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
filtered_data = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Drop rows with NaN due to lagging
filtered_data.dropna(inplace=True)


# Define dependent variable (y)
y = filtered_data['occurrence']

# Define endogenous variable (leaddaily_woi)
endog = filtered_data['leaddaily_woi_nc']

# Define instrument for the endogenous variable (lead_maj_events)
instrument = filtered_data['lead_maj_events']

# Define exogenous variables (including categorical dummies for month, year, and day of the week)
exog = pd.concat([
    filtered_data[['occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14']],
    pd.get_dummies(filtered_data['month'], prefix='month', drop_first=True),
    pd.get_dummies(filtered_data['year'], prefix='year', drop_first=True),
    pd.get_dummies(filtered_data['dow'], prefix='dow', drop_first=True)
], axis=1)

# Perform the IV regression (2SLS)
model_4 = IV2SLS(y, exog, endog, instrument).fit(cov_type='clustered', clusters=filtered_data['monthyear'])

In [62]:
# Column 5
from linearmodels.iv import IV2SLS
import pandas as pd

# Column 2
relevant_columns = ['leaddaily_woi_nc', 'occurrence', 'lead_maj_events',
    "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'gaza_war', 'monthyear'
]

data_table_4 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
filtered_data = data_table_4[data_table_4['gaza_war'] == 0].copy()

# Drop rows with NaN due to lagging
filtered_data.dropna(inplace=True)

formula = """
occurrence ~ lead_maj_events + occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14
+ C(month) + C(year) + C(dow)
"""

# Perform the regression with clustered standard errors
model_5 = smf.ols(formula, data=filtered_data).fit(
    cov_type='cluster',
    cov_kwds={'groups': filtered_data['monthyear']}
)

#### Displaying Table 5

In [63]:
# @title ##### Regression Table 5
stargazer = Stargazer([model_1, model_2, model_3, model_4, model_5])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['lead_maj_events', 'leaddaily_woi', 'leaddaily_woi_nc'])
# Modify column names with LaTeX formatting and ensure proper rendering
stargazer.custom_columns(["P_t+1 [2SLS 1st stage]", "Uncorrected P_t+1 [2SLS 1st stage]", "Occurence [2SLS 2nd stage]", "Occurence [2SLS 2nd stage]", "Occurence [OLS reduced]"], [1, 1, 1, 1 ,1])

stargazer


0,1,2,3,4,5
,,,,,
,,,,,
,P_t+1 [2SLS 1st stage],Uncorrected P_t+1 [2SLS 1st stage],Occurence [2SLS 2nd stage],Occurence [2SLS 2nd stage],Occurence [OLS reduced]
,(1),(2),(3),(4),(5)
,,,,,
lead_maj_events,0.177***,0.190***,,,0.109***
,(0.035),(0.034),,,(0.041)
leaddaily_woi,,,0.609***,,
,,,(0.233),,
leaddaily_woi_nc,,,,0.572***,


## Table 6

In [64]:
# Filter relevant columns
relevant_columns = [
    'attacks_target', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df = df_filtered.copy()

df.dropna(inplace=True)

print(df['attacks_target'].unique())

df['attacks_target'] = df['attacks_target'].astype('category')
df['attacks_target'] = df['attacks_target'].cat.reorder_categories([1, 2,  3], ordered=True)  # Adjust as needed

print(df['attacks_target'].unique())

# Independent variables
independent_vars = [
    'leaddaily_woi',
    'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14',
    'lagdaily_woi', 'lagdaily_woi2', 'lagdaily_woi3', 'lagdaily_woi4',
    'lagdaily_woi5', 'lagdaily_woi6', 'lagdaily_woi7'
]

# Add dummy variables for categorical predictors
dummy_vars = pd.get_dummies(df[['month', 'year', 'dow']], drop_first=True)

X = pd.concat([df[independent_vars], dummy_vars], axis=1)

# Add a constant for the intercept
X = sm.add_constant(X)

X = X.astype(float)
#X = X.apply(pd.to_numeric, errors='coerce')
#X = X.astype({col: 'int' for col in X.select_dtypes(include=['bool']).columns})


# Define the dependent variable
y = df['attacks_target']

# Fit the multinomial logit model with clustering
model = MNLogit(y, X)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df['monthyear']})

# Summary of the results
print(result.summary())

[3. 1. 2.]
[3, 1, 2]
Categories (3, int64): [1 < 2 < 3]


NameError: name 'MNLogit' is not defined

In [None]:
# Filter relevant columns
relevant_columns = [
    'attacks_fatal', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi', 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df = df_filtered.copy()

df.dropna(inplace=True)

df['attacks_fatal'] = df['attacks_fatal'].astype('category')
df['attacks_fatal'] = df['attacks_fatal'].cat.reorder_categories([1, 2,  3], ordered=True)  # Adjust as needed

# Independent variables
independent_vars = [
    'leaddaily_woi',
    'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14',
    'lagdaily_woi', 'lagdaily_woi2', 'lagdaily_woi3', 'lagdaily_woi4',
    'lagdaily_woi5', 'lagdaily_woi6', 'lagdaily_woi7'
]

# Add dummy variables for categorical predictors
dummy_vars = pd.get_dummies(df[['month', 'year', 'dow']], drop_first=True)

X = pd.concat([df[independent_vars], dummy_vars], axis=1)

# Add a constant for the intercept
X = sm.add_constant(X)

X = X.astype(int)

# Define the dependent variable
y = df['attacks_fatal']

# Fit the multinomial logit model with clustering
model = MNLogit(y, X)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df['monthyear']})

# Summary of the results
print(result.summary())

In [None]:
# Filter relevant columns
relevant_columns = [
    'attacks_hpd', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi', 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df = df_filtered.copy()

df.dropna(inplace=True)

df['attacks_hpd'] = df['attacks_hpd'].astype('category')
df['attacks_hpd'] = df['attacks_hpd'].cat.reorder_categories([1, 2,  3], ordered=True)  # Adjust as needed

# Independent variables
independent_vars = [
    'leaddaily_woi',
    'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14',
    'lagdaily_woi', 'lagdaily_woi2', 'lagdaily_woi3', 'lagdaily_woi4',
    'lagdaily_woi5', 'lagdaily_woi6', 'lagdaily_woi7'
]

# Add dummy variables for categorical predictors
dummy_vars = pd.get_dummies(df[['month', 'year', 'dow']], drop_first=True)

X = pd.concat([df[independent_vars], dummy_vars], axis=1)

# Add a constant for the intercept
X = sm.add_constant(X)

X = X.astype(int)

# Define the dependent variable
y = df['attacks_hpd']

# Fit the multinomial logit model with clustering
model = MNLogit(y, X)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df['monthyear']})

# Summary of the results
print(result.summary())

In [None]:
# Filter relevant columns
relevant_columns = [
    'attacks_hw', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi', 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df = df_filtered.copy()

df.dropna(inplace=True)

df['attacks_hw'] = df['attacks_hw'].astype('category')
df['attacks_hw'] = df['attacks_hw'].cat.reorder_categories([1, 2,  3], ordered=True)  # Adjust as needed

# Independent variables
independent_vars = [
    'leaddaily_woi',
    'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14',
    'lagdaily_woi', 'lagdaily_woi2', 'lagdaily_woi3', 'lagdaily_woi4',
    'lagdaily_woi5', 'lagdaily_woi6', 'lagdaily_woi7'
]

# Add dummy variables for categorical predictors
dummy_vars = pd.get_dummies(df[['month', 'year', 'dow']], drop_first=True)

X = pd.concat([df[independent_vars], dummy_vars], axis=1)

# Add a constant for the intercept
X = sm.add_constant(X)

X = X.astype(float)

# Define the dependent variable
y = df['attacks_hw']

# Fit the multinomial logit model with clustering
model = MNLogit(y, X)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df['monthyear']})

# Summary of the results
print(result.summary())

In [None]:
# Filter relevant columns
relevant_columns = [
    'victims_non_target', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi', 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Ensure the categorical variables are correctly formatted
for col in ['month', 'year', 'dow']:
    df_filtered[col] = df_filtered[col].astype(str).astype('category')

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df_filtered.dropna(inplace=True)

# Define your formula
formula = 'victims_non_target ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + C(month) + C(year) + C(dow)'

# Fit the GLM model with negative binomial family
model = sm.GLM.from_formula(formula, data=df_filtered, family=sm.families.NegativeBinomial()).fit(cov_type='HC0')


# Print the model summary
print(model.summary())

### Python code

In [None]:
import statsmodels.api as sm
import pandas as pd

# Filter relevant columns
relevant_columns = [
    'attacks_target', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi', 'lagdaily_woi'
]

df_filtered = replication_file1[relevant_columns].copy()


# Ensure the categorical variables are correctly formatted
for col in ['month', 'year', 'dow']:
    df_filtered[col] = df_filtered[col].astype(str).astype('category')

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df_filtered.dropna(inplace=True)

# Filter rows where gaza_war == 0
df_filtered = df_filtered[df_filtered['gaza_war'] == 0].copy()

df_filtered['attacks_target'] = df_filtered['attacks_target'].astype('category')

print(df_filtered['attacks_target'].cat.categories)

# Change the base category for the 'attacks_target' variable
df_filtered['attacks_target'] = df_filtered['attacks_target'].cat.reorder_categories(
    [1.0, 2.0, 3.0], ordered=True
)

# Automatically handle the categorical variables
df_filtered['month'] = df_filtered['month'].cat.codes
df_filtered['year'] = df_filtered['year'].cat.codes
df_filtered['dow'] = df_filtered['dow'].cat.codes

# Define the formula for multinomial logit regression
formula = ('attacks_target ~ leaddaily_woi + occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + '
           'lagdaily_woi1 + lagdaily_woi2 + lagdaily_woi3 + lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + '
           'C(month) + C(year) + C(dow)')

model = smf.mnlogit(formula, data=df_filtered)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df_filtered['monthyear']})

# Print the summary of the model
print(result.summary())

In [None]:
import statsmodels.api as sm
import pandas as pd
import statsmodels.formula.api as smf

# Filter relevant columns
relevant_columns = [
    'attacks_target', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi', 'lagdaily_woi'
]

df_filtered = replication_file1[relevant_columns].copy()

# Ensure the categorical variables are correctly formatted
for col in ['month', 'year', 'dow']:
    df_filtered[col] = df_filtered[col].astype(str).astype('category')

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df_filtered.dropna(inplace=True)

# Filter rows where gaza_war == 0
df_filtered = df_filtered[df_filtered['gaza_war'] == 0].copy()

# Ensure 'attacks_target' is a categorical variable
df_filtered['attacks_target'] = df_filtered['attacks_target'].astype('category')

# Reorder categories and set the base category (e.g., '1.0' as the base)
df_filtered['attacks_target'] = df_filtered['attacks_target'].cat.reorder_categories(
    [1.0, 2.0, 3.0], ordered=True
)

# Convert the 'attacks_target' to numeric codes
df_filtered['attacks_target'] = df_filtered['attacks_target'].cat.codes

# Automatically handle the categorical variables
df_filtered['month'] = df_filtered['month'].cat.codes
df_filtered['year'] = df_filtered['year'].cat.codes
df_filtered['dow'] = df_filtered['dow'].cat.codes

# Define the formula for multinomial logit regression
formula = ('attacks_target ~ leaddaily_woi + occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + '
           'lagdaily_woi1 + lagdaily_woi2 + lagdaily_woi3 + lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + '
           'C(month) + C(year) + C(dow)')

# Fit the multinomial logit model
model = smf.mnlogit(formula, data=df_filtered)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df_filtered['monthyear']})

# Print the summary of the model
print(result.summary())


In [None]:
import statsmodels.stats.outliers_influence as smi
from statsmodels.tools.tools import add_constant

X = df_filtered[['leaddaily_woi', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14', 'lagdaily_woi1',
                 'lagdaily_woi2', 'lagdaily_woi3', 'lagdaily_woi4', 'lagdaily_woi5', 'lagdaily_woi6', 'lagdaily_woi7',
                 'month', 'year', 'dow']]
X = add_constant(X)  # Add constant term for VIF calculation

vif_data = pd.DataFrame()
vif_data["Variable"] = X.columns
vif_data["VIF"] = [smi.variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

print(vif_data)


In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.discrete.discrete_model import MNLogit
from statsmodels.stats.moment_helpers import cov2corr

# Filter relevant columns
relevant_columns = [
    'attacks_fatal', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Ensure the categorical variables are correctly formatted
for col in ['month', 'year', 'dow']:
    df_filtered[col] = df_filtered[col].astype(str).astype('category')

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)

df = df_filtered.copy()

df = pd.get_dummies(df, columns=['month', 'year', 'dow'], drop_first=True)

# Define independent variables
independent_vars = [
    'leaddaily_woi',
    'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14',
    'lagdaily_woi', 'lagdaily_woi2', 'lagdaily_woi3', 'lagdaily_woi4',
    'lagdaily_woi5', 'lagdaily_woi6', 'lagdaily_woi7'
] + [col for col in df.columns if col.startswith(('month_', 'year_', 'dow_'))]

df.dropna(inplace=True)

# Add a constant
X = sm.add_constant(df[independent_vars])
X = X.astype(float)

# Define dependent variable
y = df['attacks_fatal']  # Ensure this is encoded as integers

model = MNLogit(y, X)
result = model.fit(cov_type='cluster', cov_kwds={'groups': df['monthyear']})

# Print the summary
print(result.summary())

#### Creating models

In [None]:
# Column 2A

# Filter relevant columns
relevant_columns = [
    'victims_target', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)
data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "victims_target ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg2A = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg2A.summary())

In [None]:
# Column3A

# Filter relevant columns
relevant_columns = [
    'victims_non_target', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)
data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "victims_non_target ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg3A = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg3A.summary())

In [None]:
# Column 2B

# Filter relevant columns
relevant_columns = [
    'non_fatal_victims', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'occurrence_fatal',  'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[(data_table_6['gaza_war'] == 0) & (data_table_6['occurrence_fatal'] == 0)].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag).copy()

data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "non_fatal_victims ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg2B = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg2B.summary())

In [None]:
# Column 3B

# Filter relevant columns
relevant_columns = [
    'fatal_victims', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)
data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "fatal_victims ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg3B = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg3B.summary())

In [None]:
# Column 3B

# Filter relevant columns
relevant_columns = [
    'victims_lpd', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)
data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "victims_lpd ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg2C = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg2C.summary())

In [None]:
# Column 3B

# Filter relevant columns
relevant_columns = [
    'victims_hpd', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)
data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "victims_hpd ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg3C = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg3C.summary())

In [None]:
# Column 3B

# Filter relevant columns
relevant_columns = [
    'victims_nhw', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'occurrence_hw', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[(data_table_6['gaza_war'] == 0) & (data_table_6['occurrence_hw'] == 0)].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)
data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "victims_nhw ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg2D = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg2D.summary())

In [None]:
# Column 3B

# Filter relevant columns
relevant_columns = [
    'victims_hw', "occurrence_pal_1", "occurrence_pal_2_7", "occurrence_pal_8_14",
    'month', 'year', 'dow', 'monthyear', 'gaza_war', 'daily_woi' , 'leaddaily_woi' , 'lagdaily_woi'
]

data_table_6 = replication_file1[relevant_columns].copy()

# Filter rows where gaza_war == 0
df_filtered = data_table_6[data_table_6['gaza_war'] == 0].copy()

# Add lagged variables
for lag in range(1, 8):  # Lag from 1 to 7
    df_filtered[f'lagdaily_woi{lag}'] = df_filtered['daily_woi'].shift(lag)
data_filtered = df_filtered.copy()


data_filtered.dropna(inplace=True)

# Define the formula using patsy-style syntax
formula = (
    "victims_hw ~ leaddaily_woi + lagdaily_woi + lagdaily_woi2 + lagdaily_woi3 + "
    "lagdaily_woi4 + lagdaily_woi5 + lagdaily_woi6 + lagdaily_woi7 + "
    "occurrence_pal_1 + occurrence_pal_2_7 + occurrence_pal_8_14 + "
    "C(month) + C(year) + C(dow)"
)

# Fit the GLM with a negative binomial family
model_mlneg3D = smf.glm(formula=formula, data=data_filtered, family=sm.families.NegativeBinomial(alpha=1)).fit()

# Print the summary with HAC (Newey-West) standard errors
print(model_mlneg3D.summary())

### Displaying table 6

In [None]:
# @title ##### Regression Table 6 (corrected news pressure)
stargazer = Stargazer([model_mlneg2A, model_mlneg3A])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['leaddaily_woi', 'lagdaily_woi', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14'])
stargazer.custom_columns(["ML neg (2A)", "ML neg (3A)"], [1,1])

stargazer

In [None]:
# @title ##### Regression Table 6 (corrected news pressure)
stargazer = Stargazer([model_mlneg2B, model_mlneg3B])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['leaddaily_woi', 'lagdaily_woi', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14'])
stargazer.custom_columns(["ML neg (2B)", "ML neg (3B)"], [1,1])

stargazer

In [None]:
# @title ##### Regression Table 6 (corrected news pressure)
stargazer = Stargazer([model_mlneg2C, model_mlneg3C])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['leaddaily_woi', 'lagdaily_woi', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14'])
stargazer.custom_columns(["ML neg (2C)", "ML neg (3C)"], [1,1])

stargazer

In [None]:
# @title ##### Regression Table 6 (corrected news pressure)
stargazer = Stargazer([model_mlneg2D, model_mlneg3D])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['leaddaily_woi', 'lagdaily_woi', 'occurrence_pal_1', 'occurrence_pal_2_7', 'occurrence_pal_8_14'])
stargazer.custom_columns(["ML neg (2D)", "ML neg (3D)"], [1,1])

stargazer

# Extension

## Data exploration