<a href="https://colab.research.google.com/github/Udzf/Israel-Palestine/blob/main/Econ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install and load packages

In [109]:
!pip install linearmodels
!pip install stargazer



In [110]:
import numpy as np
import pandas as pd
from linearmodels.iv import IV2SLS
from stargazer.stargazer import Stargazer

# Data preparation

## Load data

In [111]:
# Load data from github

link_replication_file1 = 'https://raw.githubusercontent.com/Udzf/Israel-Palestine/refs/heads/main/replication_file1.csv'
link_replication_file2 = 'https://raw.githubusercontent.com/Udzf/Israel-Palestine/refs/heads/main/replication_file2.csv'
link_replication_file3 = 'https://raw.githubusercontent.com/Udzf/Israel-Palestine/refs/heads/main/replication_file3.csv'

replication_file1 = pd.read_csv(link_replication_file1)
replication_file2 = pd.read_csv(link_replication_file2)
replication_file3 = pd.read_csv(link_replication_file3)

# Replication

## Table 1

### Stata code

In [112]:
''' STATA CODE
************************************************************************
** Table 1. News Pressure and the Length of Conflict-related News
************************************************************************

use "$dta/replication_file1.dta", clear

* Panel A: Full sample

cap drop sample_deriv
xi: ivreg daily_woi (length_conflict_news=high_intensity) i.month i.year i.dow, cluster (monthyear)
gen sample_deriv=1 if e(sample)

reg length_conflict_news high_intensity i.month i.year i.dow  if sample_deriv==1 , vce(cluster monthyear)
test  high_intensity
test  high_intensity
scalar F_cont=r(F)
outreg2 using "$tables/table_1a.xls", replace ctitle("Length conflict news, 1st stage") keep(high_intensity) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

xi: ivreg daily_woi (length_conflict_news = high_intensity) i.month i.year i.dow  , cluster (monthyear)
outreg2 using "$tables/table_1a.xls", append ctitle("NP, 2SLS") keep(length_conflict_news) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

xi: ivreg daily_woi_nc (length_conflict_news = high_intensity) i.month i.year i.dow , cluster (monthyear)
outreg2 using "$tables/table_1a.xls", append ctitle("Uncorr NP, 2SLS") keep(length_conflict_news) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

* Panel A: Sample of days with an attack on the same day or the previous day

cap drop sample_deriv
xi: ivreg daily_woi (length_conflict_news=high_intensity) i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster (monthyear)
gen sample_deriv=1 if e(sample)

reg length_conflict_news high_intensity i.month i.year i.dow  if sample_deriv==1& (occurrence_t_y==1 | occurrence_pal_t_y ==1) , vce(cluster monthyear)
test  high_intensity
scalar F_cont=r(F)
outreg2 using "$tables/table_1b.xls", replace ctitle("Length conflict news, 1st stage") keep(high_intensity) nocons label bdec(3) addstat ("F excl. instr.", F_cont)

xi: ivreg daily_woi (length_conflict_news = high_intensity) i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1) , cluster (monthyear)
outreg2 using "$tables/table_1b.xls", append ctitle("NP, 2SLS") keep(length_conflict_news) nocons label bdec(5) addstat ("F excl. instr.", F_cont) dec(3)

xi: ivreg daily_woi_nc (length_conflict_news = high_intensity) i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster (monthyear)
outreg2 using "$tables/table_1b.xls", append ctitle("Uncorr NP, 2SLS") keep(length_conflict_news) nocons label bdec(3) addstat ("F excl. instr.", F_cont)
'''

' STATA CODE\n************************************************************************\n** Table 1. News Pressure and the Length of Conflict-related News\n************************************************************************\n\nuse "$dta/replication_file1.dta", clear\n\n* Panel A: Full sample\n\ncap drop sample_deriv\nxi: ivreg daily_woi (length_conflict_news=high_intensity) i.month i.year i.dow, cluster (monthyear)\ngen sample_deriv=1 if e(sample)\n\nreg length_conflict_news high_intensity i.month i.year i.dow  if sample_deriv==1 , vce(cluster monthyear)\ntest  high_intensity\ntest  high_intensity\nscalar F_cont=r(F)\noutreg2 using "$tables/table_1a.xls", replace ctitle("Length conflict news, 1st stage") keep(high_intensity) nocons label bdec(3) addstat ("F excl. instr.", F_cont)\n\nxi: ivreg daily_woi (length_conflict_news = high_intensity) i.month i.year i.dow  , cluster (monthyear)\noutreg2 using "$tables/table_1a.xls", append ctitle("NP, 2SLS") keep(length_conflict_news) nocons

### Python code

#### Table 1: A. Full Sample

##### Creating models

In [113]:
# Data table 1
# Drop missing values from the relevant columns
data_table_1 = replication_file1.copy()
relevant_columns = ['daily_woi', 'daily_woi_nc', 'length_conflict_news', 'high_intensity', 'month', 'year', 'dow', 'monthyear']

df = data_table_1[relevant_columns].dropna()

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf


# Define the first-stage formula
first_stage_formula = 'length_conflict_news ~ high_intensity + C(month) + C(year) + C(dow)'

# Run the first-stage regression
first_stage = smf.ols(first_stage_formula, data=df).fit()

# Use the predicted values from the first stage
df['length_conflict_news_pred'] = first_stage.predict(df)

# Define the second-stage formula (corrected)
second_stage_formula_corrected = 'daily_woi ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_corrected = smf.ols(second_stage_formula_corrected, data=df).fit()


# Define the second-stage formula
second_stage_formula_uncorrected = 'daily_woi_nc ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_uncorrected = smf.ols(second_stage_formula_uncorrected, data=df).fit()


##### Displaying Table 1: A

In [142]:
stargazer = Stargazer([first_stage, second_stage_corrected, second_stage_uncorrected])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['Intercept', 'high_intensity', 'length_conflict_news_pred'])
stargazer.custom_columns(["Length of Conflict News [2SLS 1st stage]", "Corrected News Pressure [2SLS 2nd stage]", "Uncorrected News Pressure [2SLS 2nd stage]"], [1, 1, 1])

stargazer

0,1,2,3
,,,
,,,
,Length of Conflict News [2SLS 1st stage],Corrected News Pressure [2SLS 2nd stage],Uncorrected News Pressure [2SLS 2nd stage]
,(1),(2),(3)
,,,
Intercept,-2.870***,1.141***,1.129***
,(0.494),(0.039),(0.040)
high_intensity,5.291***,,
,(0.247),,
length_conflict_news_pred,,0.000,-0.018***


#### Table 1: B. Sample of Days with an Attack on the Same Day or the Previous Day

##### Creating models

In [115]:
# Data table 1
# Drop missing values from the relevant columns
data_table_1 = replication_file1.copy()
relevant_columns = ['daily_woi', 'daily_woi_nc', 'length_conflict_news', 'high_intensity', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
df = data_table_1[relevant_columns].dropna()

# Restrict sample to only include certain observations
df = df[(df['occurrence_t_y'] == 1) | (df['occurrence_pal_t_y'] == 1)].copy()

# Define the first-stage formula
first_stage_formula = 'length_conflict_news ~ high_intensity + C(month) + C(year) + C(dow)'

# Run the first-stage regression
first_stage = smf.ols(first_stage_formula, data=df).fit()

# Use the predicted values from the first stage
df['length_conflict_news_pred'] = first_stage.predict(df)

# Define the second-stage formula (corrected)
second_stage_formula_corrected = 'daily_woi ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_corrected = smf.ols(second_stage_formula_corrected, data=df).fit()


# Define the second-stage formula
second_stage_formula_uncorrected = 'daily_woi_nc ~ length_conflict_news_pred + C(month) + C(year) + C(dow)'

# Run the second-stage regression
second_stage_uncorrected = smf.ols(second_stage_formula_uncorrected, data=df).fit()


##### Displaying Table 1:B

In [127]:
stargazer = Stargazer([first_stage, second_stage_corrected, second_stage_uncorrected])

# Customize the output as needed
stargazer.covariate_order(['Intercept', 'high_intensity', 'length_conflict_news_pred'])
stargazer.title("Regression Results")
stargazer.custom_columns(["Length of Conflict News [2SLS 1st stage]", "Corrected News Pressure [2SLS 2nd stage]", "Uncorrected News Pressure [2SLS 2nd stage]"], [1, 1, 1])

stargazer

0,1,2,3
,,,
,,,
,Length of Conflict News [2SLS 1st stage],Corrected News Pressure [2SLS 2nd stage],Uncorrected News Pressure [2SLS 2nd stage]
,(1),(2),(3)
,,,
Intercept,-2.870***,1.141***,1.129***
,(0.494),(0.039),(0.040)
high_intensity,5.291***,,
,(0.247),,
length_conflict_news_pred,,0.000,-0.018***


## Table 2

### Stata code

In [117]:
'''
************************************************************************
** Table 2. Coverage of Conflict, News Pressure, and Google Searches
************************************************************************

use "$dta/replication_file1.dta", clear

eststo: xi: reg any_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , cluster(monthyear)
outreg2 using "$tables/table_2.xls", replace ctitle("Isr-Pal on news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)

eststo: xi: nbreg length_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , vce(cluster monthyear)
outreg2 using "$tables/table_2.xls", append ctitle("Time to Isr-Pal news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)

eststo: xi: reg any_conflict_news lnvic_t_y lnvic_pal_y daily_woi i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster(monthyear)
outreg2 using "$tables/table_2.xls", append ctitle("Isr-Pal on news") keep(lnvic_t_y lnvic_pal_y daily_woi) nocons label bdec(3)

eststo: xi: nbreg length_conflict_news lnvic_t_y lnvic_pal_y daily_woi i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), vce(cluster monthyear)
outreg2 using "$tables/table_2.xls", append ctitle("Time to Isr-Pal news") keep(lnvic_t_y lnvic_pal_y daily_woi) nocons label bdec(3)

xi: newey conflict_searches lnvic_t_y lnvic_pal_y monthyear i.month i.year i.dow  if length_conflict_news_t_t_1!=., lag(7) force
outreg2 using "$tables/table_2.xls", append stats(coef se) keep(lnvic_t_y lnvic_pal_y) nocons label bdec(3)

xi: newey conflict_searches lnvic_t_y lnvic_pal_y length_conflict_news_t_t_1  monthyear i.month i.year i.dow  , lag(7) force
outreg2 using "$tables/table_2.xls", append stats(coef se) keep(lnvic_t_y lnvic_pal_y length_conflict_news_t_t_1) nocons label bdec(3) sdec(3)

* Corresponding OLS regressions estimated below to display R-squared
eststo: xi: reg conflict_searches lnvic_t_y lnvic_pal_y monthyear i.month i.year i.dow if length_conflict_news_t_t_1!=., vce(cluster monthyear)
eststo: xi: reg conflict_searches lnvic_t_y lnvic_pal_y length_conflict_news_t_t_1 monthyear i.month i.year i.dow , vce(cluster monthyear)
esttab, se pr2 r2 star(* 0.1 ** 0.05 *** 0.01)
'''

'\n************************************************************************\n** Table 2. Coverage of Conflict, News Pressure, and Google Searches\n************************************************************************\n\nuse "$dta/replication_file1.dta", clear\n\neststo: xi: reg any_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , cluster(monthyear)\noutreg2 using "$tables/table_2.xls", replace ctitle("Isr-Pal on news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)\n\neststo: xi: nbreg length_conflict_news occurrence_t_y occurrence_pal_t_y i.month i.year i.dow , vce(cluster monthyear)\noutreg2 using "$tables/table_2.xls", append ctitle("Time to Isr-Pal news") keep(occurrence_t_y occurrence_pal_t_y) nocons label bdec(3)\n\neststo: xi: reg any_conflict_news lnvic_t_y lnvic_pal_y daily_woi i.month i.year i.dow if  (occurrence_t_y==1 | occurrence_pal_t_y ==1), cluster(monthyear)\noutreg2 using "$tables/table_2.xls", append ctitle("Isr-Pal on news") ke

### Python code

##### Creating the models

In [135]:
data_table_2 = replication_file1.copy()
relevant_columns = ['any_conflict_news', 'length_conflict_news', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
data_table_2 = data_table_2[relevant_columns].dropna()


data = data_table_2.copy()


# First column
# Adding interaction terms for categorical variables like i.month, i.year, and i.dow
data['month'] = data['month'].astype('category')
data['year'] = data['year'].astype('category')
data['dow'] = data['dow'].astype('category')

# Defining the regression formula
formula = "any_conflict_news ~ occurrence_t_y + occurrence_pal_t_y + C(month) + C(year) + C(dow)"

# Running the regression with clustering on 'monthyear'
model_1 = smf.ols(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['monthyear']})

In [136]:
# Second column
# Specify that the month, year, and dow variables are categorical
data['month'] = data['month'].astype('category')
data['year'] = data['year'].astype('category')
data['dow'] = data['dow'].astype('category')

# Define the regression formula
formula = "length_conflict_news ~ occurrence_t_y + occurrence_pal_t_y + C(month) + C(year) + C(dow)"

# Fit the negative binomial regression model
model_2 = smf.negativebinomial(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['monthyear']})

  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


         Current function value: 0.758654
         Iterations: 35
         Function evaluations: 41
         Gradient evaluations: 41


In [137]:
# Third column

data_table_2 = replication_file1.copy()
relevant_columns = ['any_conflict_news', 'daily_woi', 'lnvic_pal_y', 'lnvic_t_y', 'length_conflict_news', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
data_table_2 = data_table_2[relevant_columns].dropna()

data = data_table_2.copy()

# Filter the data for the specified condition
filtered_data = data[(data['occurrence_t_y'] == 1) | (data['occurrence_pal_t_y'] == 1)].copy()

# Ensure categorical variables for month, year, and day of the week
filtered_data['month'] = filtered_data['month'].astype('category')
filtered_data['year'] = filtered_data['year'].astype('category')
filtered_data['dow'] = filtered_data['dow'].astype('category')

# Define the regression formula
formula = "any_conflict_news ~ lnvic_t_y + lnvic_pal_y + daily_woi + C(month) + C(year) + C(dow)"

# Fit the linear regression model with clustered standard errors
model_3 = smf.ols(formula, data=filtered_data).fit(cov_type='cluster', cov_kwds={'groups': filtered_data['monthyear']})

In [138]:
data_table_2 = replication_file1.copy()
relevant_columns = ['any_conflict_news', 'daily_woi', 'lnvic_pal_y', 'lnvic_t_y', 'length_conflict_news', 'month', 'year', 'dow', 'monthyear', 'occurrence_t_y', 'occurrence_pal_t_y' ]
data_table_2 = data_table_2[relevant_columns].dropna()

data = data_table_2.copy()

# Filter the data for the specified condition
filtered_data = data[(data['occurrence_t_y'] == 1) | (data['occurrence_pal_t_y'] == 1)].copy()

# Ensure categorical variables for month, year, and day of the week
filtered_data['month'] = filtered_data['month'].astype('category')
filtered_data['year'] = filtered_data['year'].astype('category')
filtered_data['dow'] = filtered_data['dow'].astype('category')

# Define the regression formula
formula = "length_conflict_news ~ lnvic_t_y + lnvic_pal_y + daily_woi + C(month) + C(year) + C(dow)"

# Fit the negative binomial regression model with clustered standard errors
model_4 = smf.negativebinomial(formula, data=filtered_data).fit(cov_type='cluster', cov_kwds={'groups': filtered_data['monthyear']})

  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


         Current function value: 1.011429
         Iterations: 35
         Function evaluations: 64
         Gradient evaluations: 64


In [139]:
data_table_2 = replication_file1.copy()
relevant_columns = ['length_conflict_news_t_t_1', 'conflict_searches', 'lnvic_pal_y', 'lnvic_t_y',  'month', 'year', 'dow', 'monthyear' ]
data_table_2 = data_table_2[relevant_columns].dropna()

data = data_table_2.copy()

# Filter the data for the specified condition
filtered_data = data[data['length_conflict_news_t_t_1'].notnull()]

# Ensure categorical variables for month, year, and day of the week
filtered_data['month'] = filtered_data['month'].astype('category')
filtered_data['year'] = filtered_data['year'].astype('category')
filtered_data['dow'] = filtered_data['dow'].astype('category')

# Define the regression formula
formula = "conflict_searches ~ lnvic_t_y + lnvic_pal_y + monthyear + C(month) + C(year) + C(dow)"

# Fit the linear regression model with HAC standard errors (lag=7)
model_5 = smf.ols(formula, data=filtered_data).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

In [140]:
import statsmodels.formula.api as smf

# Ensure categorical variables for month, year, and day of the week
data['month'] = data['month'].astype('category')
data['year'] = data['year'].astype('category')
data['dow'] = data['dow'].astype('category')

# Define the regression formula
formula = "conflict_searches ~ lnvic_t_y + lnvic_pal_y + length_conflict_news_t_t_1 + monthyear + C(month) + C(year) + C(dow)"

# Fit the linear regression model with HAC standard errors (lag=7)
model_6 = smf.ols(formula, data=data).fit(cov_type='HAC', cov_kwds={'maxlags': 7})

#### Displaying Table 2

In [141]:
# @title
stargazer = Stargazer([model_1, model_2, model_3, model_4, model_5, model_6])

# Customize the output as needed
stargazer.title("Regression Results")
stargazer.covariate_order(['occurrence_t_y', 'occurrence_pal_t_y', 'daily_woi', 'lnvic_t_y', 'lnvic_pal_y', 'length_conflict_news_t_t_1'])
stargazer.custom_columns(["Any news (OLS) - all days - 2000-11", "Length news (bin) - all days - 2000-11", "Any news (OLS) - t/t-1 - 2000-11", "Length news (bin) - t/t-1 - 2000-11", "Searches (OLS) - all days - 2004-11", "Searches (OLS) - all days - 2004-11"], [1, 1, 1, 1 ,1, 1])

stargazer

0,1,2,3,4,5,6
,,,,,,
,,,,,,
,Any news (OLS) - all days - 2000-11,Length news (bin) - all days - 2000-11,Any news (OLS) - t/t-1 - 2000-11,Length news (bin) - t/t-1 - 2000-11,Searches (OLS) - all days - 2004-11,Searches (OLS) - all days - 2004-11
,(1),(2),(3),(4),(5),(6)
,,,,,,
occurrence_t_y,0.100***,0.985***,,,,
,(0.020),(0.183),,,,
occurrence_pal_t_y,0.112***,0.689***,,,,
,(0.032),(0.139),,,,
daily_woi,,,-0.078*,-0.661***,,
