In [14]:
# ============================================================
# Construct major political events dataset (2000–2011)
# Durante & Zhuravskaya (JPE) — replication
# Output written DIRECTLY in the Git repo root
# ============================================================

import os
import pandas as pd

# ------------------------------------------------------------
# 0. ABSOLUTE PATH TO YOUR GIT REPO (IMPORTANT)
# ------------------------------------------------------------
REPO_DIR = "/Users/raniabenhamidane/Desktop/Israel-Palestine"

# ------------------------------------------------------------
# 1. Event centers (one row per central event)
# ------------------------------------------------------------
events = pd.DataFrame([
    # ---- 2000 presidential cycle ----
    {"event_date": "2000-01-24", "event_type": "caucus", "label": "Iowa caucuses"},
    {"event_date": "2000-02-01", "event_type": "nh_primary", "label": "New Hampshire primary"},
    {"event_date": "2000-03-07", "event_type": "super_tuesday", "label": "Super Tuesday"},
    {"event_date": "2000-11-07", "event_type": "general_election", "label": "General election (nationwide)"},

    # ---- inaugurations ----
    {"event_date": "2001-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},
    {"event_date": "2005-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},
    {"event_date": "2009-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},

    # ---- nationwide general elections ----
    {"event_date": "2002-11-05", "event_type": "general_election", "label": "General election (nationwide)"},
    {"event_date": "2004-11-02", "event_type": "general_election", "label": "General election (nationwide)"},
    {"event_date": "2006-11-07", "event_type": "general_election", "label": "General election (nationwide)"},
    {"event_date": "2008-11-04", "event_type": "general_election", "label": "General election (nationwide)"},
    {"event_date": "2010-11-02", "event_type": "general_election", "label": "General election (nationwide)"},

    # ---- 2004 presidential cycle ----
    {"event_date": "2004-01-19", "event_type": "caucus", "label": "Iowa caucuses"},
    {"event_date": "2004-01-27", "event_type": "nh_primary", "label": "New Hampshire primary"},
    {"event_date": "2004-03-02", "event_type": "super_tuesday", "label": "Super Tuesday"},

    # ---- 2008 presidential cycle ----
    {"event_date": "2008-01-03", "event_type": "caucus", "label": "Iowa caucuses"},
    {"event_date": "2008-01-22", "event_type": "nh_primary", "label": "New Hampshire primary"},
    {"event_date": "2008-02-05", "event_type": "super_tuesday", "label": "Super Tuesday"},
])

events["event_date"] = pd.to_datetime(events["event_date"])
events = events.sort_values(["event_date", "event_type"]).reset_index(drop=True)

# ------------------------------------------------------------
# 2. Time windows (lags/leads) — as in the paper
# ------------------------------------------------------------
# Interpretation:
# - "5 days around" -> t-2 ... t+2
# - "3 days around" -> t-1 ... t+1
# - day-of only     -> t

WINDOW = {
    "general_election": 2,
    "nh_primary": 1,
    "super_tuesday": 1,
    "caucus": 0,
    "inauguration": 0,
}

# ------------------------------------------------------------
# 3. Expand to daily observations
# ------------------------------------------------------------
rows = []
for _, r in events.iterrows():
    k = WINDOW[r["event_type"]]
    for d in range(-k, k + 1):
        rows.append({
            "date": r["event_date"] + pd.Timedelta(days=d),
            "major_political_event": 1,
            "event_type": r["event_type"],
            "event_center": r["event_date"],
        })

expanded = pd.DataFrame(rows)

# ------------------------------------------------------------
# 4. Final daily dataset used in regressions
# ------------------------------------------------------------
daily = (
    expanded
    .groupby("date")
    .agg(
        major_political_event=("major_political_event", "max"),
        which_events=("event_type", lambda s: ",".join(sorted(set(s)))),
        event_centers=("event_center", lambda s: ",".join(sorted(set(s.dt.strftime("%Y-%m-%d")))))
    )
    .reset_index()
    .sort_values("date")
)

# ------------------------------------------------------------
# 5. Write CSV DIRECTLY in the repo root
# ------------------------------------------------------------
out_path = os.path.join(REPO_DIR, "major_political_events_2000_2011.csv")
daily.to_csv(out_path, index=False)

print("File successfully written to:")
print(out_path)
print("Date range:", daily["date"].min(), "to", daily["date"].max())
print("Number of event-days:", len(daily))

daily.head()

File successfully written to:
/Users/raniabenhamidane/Desktop/Israel-Palestine/major_political_events_2000_2011.csv
Date range: 2000-01-24 00:00:00 to 2010-11-04 00:00:00
Number of event-days: 54


Unnamed: 0,date,major_political_event,which_events,event_centers
0,2000-01-24,1,caucus,2000-01-24
1,2000-01-31,1,nh_primary,2000-02-01
2,2000-02-01,1,nh_primary,2000-02-01
3,2000-02-02,1,nh_primary,2000-02-01
4,2000-03-06,1,super_tuesday,2000-03-07
