In [10]:
# ============================================================
# Construct major political events dataset (2000–2011)
# Durante & Zhuravskaya (JPE) — replication
# Output written DIRECTLY in the Git repo root
# ============================================================

import os
import pandas as pd

# ------------------------------------------------------------
# 0. ABSOLUTE PATH TO YOUR GIT REPO (IMPORTANT)
# ------------------------------------------------------------
REPO_DIR = "/Users/raniabenhamidane/Desktop/Israel-Palestine"

# ------------------------------------------------------------
# 1. Event centers (one row per central event)
# ------------------------------------------------------------

events = pd.DataFrame([
    # ---- 2000 presidential cycle ----
    {"event_date": "2000-01-24", "event_type": "caucus", "label": "Iowa caucuses"},
    {"event_date": "2000-02-01", "event_type": "nh_primary", "label": "New Hampshire primary"},
    {"event_date": "2000-03-07", "event_type": "super_tuesday", "label": "Super Tuesday"},
    {"event_date": "2000-11-07", "event_type": "general_election", "label": "General election day"},

    # ---- inaugurations ----
    {"event_date": "2001-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},
    {"event_date": "2005-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},
    {"event_date": "2009-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},

    # ---- nationwide-ish GENERAL ELECTION DAY (incl. odd years from your list) ----
    {"event_date": "2001-11-06", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2002-11-05", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2003-11-04", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2004-11-02", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2005-11-08", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2006-11-07", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2007-11-06", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2008-11-04", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2009-11-03", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2010-11-02", "event_type": "general_election", "label": "General election day"},
    {"event_date": "2011-11-08", "event_type": "general_election", "label": "General election day"},

    # ---- 2004 presidential cycle ----
    {"event_date": "2004-01-19", "event_type": "caucus", "label": "Iowa caucuses"},
    {"event_date": "2004-01-27", "event_type": "nh_primary", "label": "New Hampshire primary"},
    {"event_date": "2004-03-02", "event_type": "super_tuesday", "label": "Super Tuesday"},

    # ---- 2008 presidential cycle ----
    {"event_date": "2008-01-03", "event_type": "caucus", "label": "Iowa caucuses"},
    {"event_date": "2008-01-22", "event_type": "nh_primary", "label": "New Hampshire primary"},
    {"event_date": "2008-02-05", "event_type": "super_tuesday", "label": "Super Tuesday"},
])

events["event_date"] = pd.to_datetime(events["event_date"])
events = events.sort_values(["event_date", "event_type"]).reset_index(drop=True)

# ------------------------------------------------------------
# 2. Time windows (lags/leads) — as in the paper
# ------------------------------------------------------------
# Interpretation:
# - "5 days around" -> t-2 ... t+2
# - "3 days around" -> t-1 ... t+1
# - day-of only     -> t

WINDOW = {
    "general_election": 2,
    "nh_primary": 1,
    "super_tuesday": 1,
    "caucus": 0,
    "inauguration": 0,
}

# ------------------------------------------------------------
# 3. Expand to daily observations
# ------------------------------------------------------------
rows = []
for _, r in events.iterrows():
    k = WINDOW[r["event_type"]]
    for d in range(-k, k + 1):
        rows.append({
            "date": r["event_date"] + pd.Timedelta(days=d),
            "major_political_event": 1,
            "event_type": r["event_type"],
            "event_center": r["event_date"],
        })

expanded = pd.DataFrame(rows)

# ------------------------------------------------------------
# 4. Final daily dataset used in regressions
# ------------------------------------------------------------
daily = (
    expanded
    .groupby("date")
    .agg(
        major_political_event=("major_political_event", "max"),
        which_events=("event_type", lambda s: ",".join(sorted(set(s)))),
        event_centers=("event_center", lambda s: ",".join(sorted(set(s.dt.strftime("%Y-%m-%d")))))
    )
    .reset_index()
    .sort_values("date")
)

# ------------------------------------------------------------
# 5. Write CSV DIRECTLY in the repo root
# ------------------------------------------------------------
out_path = os.path.join(REPO_DIR, "major_political_events_2000_2011.csv")
daily.to_csv(out_path, index=False)

print("File successfully written to:")
print(out_path)
print("Date range:", daily["date"].min(), "to", daily["date"].max())
print("Number of event-days:", len(daily))

daily.head()

File successfully written to:
/Users/raniabenhamidane/Desktop/Israel-Palestine/major_political_events_2000_2011.csv
Date range: 2000-01-24 00:00:00 to 2011-11-10 00:00:00
Number of event-days: 84


Unnamed: 0,date,major_political_event,which_events,event_centers
0,2000-01-24,1,caucus,2000-01-24
1,2000-01-31,1,nh_primary,2000-02-01
2,2000-02-01,1,nh_primary,2000-02-01
3,2000-02-02,1,nh_primary,2000-02-01
4,2000-03-06,1,super_tuesday,2000-03-07


In [11]:
import pandas as pd

# ============================================================
# Major political events dataset (2012–2019)
# Same structure as your 2000–2011 file:
# columns: date, major_political_event, which_events, event_centers, labels
# ============================================================

events = [
    # ---- 2012 presidential cycle ----
    {"event_date": "2012-01-03", "event_type": "caucus", "label": "Iowa presidential caucuses"},
    {"event_date": "2012-01-10", "event_type": "nh_primary", "label": "New Hampshire presidential primary"},
    {"event_date": "2012-03-06", "event_type": "super_tuesday", "label": "Super Tuesday"},
    {"event_date": "2012-11-06", "event_type": "general_election", "label": "General election day (nationwide)"},

    # ---- 2013 ----
    {"event_date": "2013-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},
    {"event_date": "2013-11-05", "event_type": "general_election", "label": "General election day (major state/local elections)"},

    # ---- 2014 midterms ----
    {"event_date": "2014-11-04", "event_type": "general_election", "label": "General election day (nationwide)"},

    # ---- 2015 ----
    {"event_date": "2015-11-03", "event_type": "general_election", "label": "General election day (major state/local elections)"},

    # ---- 2016 presidential cycle ----
    # NOTE: your list has an odd Iowa date (Jan 18, 2016) and also Feb 1, 2016.
    # The actual Iowa caucuses were Feb 1, 2016, so we keep Feb 1.
    {"event_date": "2016-02-01", "event_type": "caucus", "label": "Iowa presidential caucuses"},
    {"event_date": "2016-02-09", "event_type": "nh_primary", "label": "New Hampshire presidential primary"},
    {"event_date": "2016-03-01", "event_type": "super_tuesday", "label": "Super Tuesday"},
    {"event_date": "2016-11-08", "event_type": "general_election", "label": "General election day (nationwide)"},

    # ---- 2017 ----
    {"event_date": "2017-01-20", "event_type": "inauguration", "label": "Presidential inauguration"},
    {"event_date": "2017-11-07", "event_type": "general_election", "label": "General election day (major state/local elections)"},

    # ---- 2018 midterms ----
    {"event_date": "2018-11-06", "event_type": "general_election", "label": "General election day (nationwide)"},

    # ---- 2019 ----
    {"event_date": "2019-11-05", "event_type": "general_election", "label": "General election day (KY/MS etc.)"},
]

df_events = pd.DataFrame(events)
df_events["event_date"] = pd.to_datetime(df_events["event_date"])
df_events = df_events.sort_values(["event_date", "event_type"]).reset_index(drop=True)

# Windows (same as before)
WINDOW = {
    "general_election": 2,  # 5 days around: t-2..t+2
    "nh_primary": 1,        # 3 days around: t-1..t+1
    "super_tuesday": 1,     # 3 days around: t-1..t+1
    "caucus": 0,            # day-of
    "inauguration": 0,      # day-of
}

# Expand
rows = []
for _, r in df_events.iterrows():
    k = WINDOW.get(r["event_type"], 0)
    for d in range(-k, k + 1):
        rows.append({
            "date": r["event_date"] + pd.Timedelta(days=d),
            "major_political_event": 1,
            "event_type": r["event_type"],
            "event_center": r["event_date"],
            "label": r["label"],
        })

expanded = pd.DataFrame(rows)

# Collapse to daily
daily_1219 = (
    expanded
    .groupby("date", as_index=False)
    .agg(
        major_political_event=("major_political_event", "max"),
        which_events=("event_type", lambda s: ",".join(sorted(set(s)))),
        event_centers=("event_center", lambda s: ",".join(sorted(set(s.dt.strftime("%Y-%m-%d"))))),
        labels=("label", lambda s: " | ".join(sorted(set(s))))
    )
    .sort_values("date")
)

# Keep only 2012–2019 inclusive
daily_1219 = daily_1219[(daily_1219["date"] >= "2012-01-01") & (daily_1219["date"] <= "2019-12-31")].copy()

# Save in repo root
out_path = "major_political_events_2012_2019.csv"
daily_1219.to_csv(out_path, index=False)

print("Saved:", out_path)
print("Rows:", daily.shape[0])
print("Unique which_events tokens:", sorted({t for x in daily_1219["which_events"] for t in x.split(",")}))
print("Date range:", daily_1219["date"].min(), "->", daily_1219["date"].max())

daily_1219.head()

Saved: major_political_events_2012_2019.csv
Rows: 84
Unique which_events tokens: ['caucus', 'general_election', 'inauguration', 'nh_primary', 'super_tuesday']
Date range: 2012-01-03 00:00:00 -> 2019-11-07 00:00:00


Unnamed: 0,date,major_political_event,which_events,event_centers,labels
0,2012-01-03,1,caucus,2012-01-03,Iowa presidential caucuses
1,2012-01-09,1,nh_primary,2012-01-10,New Hampshire presidential primary
2,2012-01-10,1,nh_primary,2012-01-10,New Hampshire presidential primary
3,2012-01-11,1,nh_primary,2012-01-10,New Hampshire presidential primary
4,2012-03-05,1,super_tuesday,2012-03-06,Super Tuesday


In [12]:
#combine from 2000-2019

# ============================================================
# (C) Combine -> 2000–2019
# ============================================================

combined = pd.concat([daily, daily_1219], ignore_index=True).sort_values("date").reset_index(drop=True)

# sanity checks
dup_dates = combined["date"].duplicated().sum()
print("Duplicate dates:", dup_dates)  # should be 0

# drop labels if exists and useless
#if "labels" in combined.columns:
    #if combined["labels"].isna().all():
combined = combined.drop(columns=["labels"])


print("Combined rows:", combined.shape[0])
print("Range:", combined["date"].min(), "->", combined["date"].max())
print("which_events tokens:", sorted({t for x in combined["which_events"] for t in str(x).split(",") if t}))

# ============================================================
# (D) Save in repo root
# ============================================================

out_path = os.path.join(REPO_DIR, "major_political_events_2000_2019.csv")
combined.to_csv(out_path, index=False)

print("Saved:", out_path)
combined.head()

Duplicate dates: 0
Combined rows: 140
Range: 2000-01-24 00:00:00 -> 2019-11-07 00:00:00
which_events tokens: ['caucus', 'general_election', 'inauguration', 'nh_primary', 'super_tuesday']
Saved: /Users/raniabenhamidane/Desktop/Israel-Palestine/major_political_events_2000_2019.csv


Unnamed: 0,date,major_political_event,which_events,event_centers
0,2000-01-24,1,caucus,2000-01-24
1,2000-01-31,1,nh_primary,2000-02-01
2,2000-02-01,1,nh_primary,2000-02-01
3,2000-02-02,1,nh_primary,2000-02-01
4,2000-03-06,1,super_tuesday,2000-03-07


In [13]:
#build major_event dummy 
pol = pd.read_csv("data_clean/major_political_events_2000_2019.csv")
fifa = pd.read_csv("data_clean/fifa_major_events_2000_2019.csv")

# ----------------------------
# 3) Build daily indicators (unique dates)
# ----------------------------
# political already has major_political_event=1 (but keep safe)
pol_daily = (
    pol[["date"]].drop_duplicates()
    .assign(major_political_event=1)
)

# fifa daily dummy (one row per date)
fifa_daily = (
    fifa[["date"]].drop_duplicates()
    .assign(fifa_event=1)
)

# ----------------------------
# 4) Combine -> major_event dummy
# ----------------------------
major = pd.merge(pol_daily, fifa_daily, on="date", how="outer")

major["major_political_event"] = major["major_political_event"].fillna(0).astype(int)
major["fifa_event"] = major["fifa_event"].fillna(0).astype(int)

major["major_event"] = ((major["major_political_event"] == 1) | (major["fifa_event"] == 1)).astype(int)

major = major.sort_values("date").reset_index(drop=True)

print("Unique political days:", major["major_political_event"].sum())
print("Unique fifa days:", major["fifa_event"].sum())
print("Unique major_event days (union):", major["major_event"].sum())

major.head()

# ----------------------------
# 6) Save final dummy dataset
# ----------------------------
major.to_csv("major_events_dummy_2000_2019.csv", index=False)
print("Saved: major_events_dummy_2000_2019.csv")




Unique political days: 140
Unique fifa days: 15
Unique major_event days (union): 155
Saved: major_events_dummy_2000_2019.csv


In [7]:
import pandas as pd

# ============================================================
# Build MAJOR EVENT dummy (daily calendar 2000-2019)
# Output: one row per day, major_event = 1 if political OR fifa
# ============================================================

# 1) Read datasets
pol = pd.read_csv("data_clean/major_political_events_2000_2019.csv")
fifa = pd.read_csv("data_clean/fifa_major_events_2000_2019.csv")

# 2) Ensure dates are datetime
pol["date"] = pd.to_datetime(pol["date"])
fifa["date"] = pd.to_datetime(fifa["date"])

# 3) Create FULL daily calendar
calendar = pd.DataFrame({
    "date": pd.date_range("2000-01-01", "2019-12-31", freq="D")
})

# 4) Build daily event indicators (unique dates)
pol_daily = (
    pol[["date"]].drop_duplicates()
    .assign(major_political_event=1)
)

fifa_daily = (
    fifa[["date"]].drop_duplicates()
    .assign(fifa_event=1)
)

# 5) Merge onto full calendar (so missing days become 0)
major = calendar.merge(pol_daily, on="date", how="left")
major = major.merge(fifa_daily, on="date", how="left")

major["major_political_event"] = major["major_political_event"].fillna(0).astype(int)
major["fifa_event"] = major["fifa_event"].fillna(0).astype(int)



# Union dummy
major["major_event"] = ((major["major_political_event"] == 1) | (major["fifa_event"] == 1)).astype(int)

# 6) Sort + sanity checks
major = major.sort_values("date").reset_index(drop=True)

print("Date range:", major["date"].min(), "->", major["date"].max())
print("Total days:", major.shape[0])
print("Political event-days:", major["major_political_event"].sum())
print("FIFA event-days:", major["fifa_event"].sum())
print("Major event-days (union):", major["major_event"].sum())

#rename variables
major = major.rename(columns={
    "date": "Date",
    "major_event": "lead_maj_events"
})


# 7) Save
major.to_csv("data_clean/major_events_dummy_2000_2019.csv", index=False)
print("Saved: data_clean/major_events_dummy_2000_2019.csv")

major.head()


Date range: 2000-01-01 00:00:00 -> 2019-12-31 00:00:00
Total days: 7305
Political event-days: 140
FIFA event-days: 15
Major event-days (union): 155
Saved: data_clean/major_events_dummy_2000_2019.csv


Unnamed: 0,Date,major_political_event,fifa_event,lead_maj_events
0,2000-01-01,0,0,0
1,2000-01-02,0,0,0
2,2000-01-03,0,0,0
3,2000-01-04,0,0,0
4,2000-01-05,0,0,0
