In [20]:
# ============================================
# NYC 24/7 CAMERAS — OLS WITH/WITHOUT INTERACTION (NO MONTH FE)
# WINDOW: ±6 MONTHS AROUND 2022-08-01
# OUTPUTS: CSV + TeX (tabular-only + with caption)
# ============================================

import numpy as np
import pandas as pd
from pathlib import Path
import statsmodels.formula.api as smf
from scipy import stats as scistats

# paths
DATA_PATH  = "/Users/eamon/Desktop/University/UofT 2025-26/Fall/Applied Machine Learning/Research Project/Original Paper/Data/Motor_Vehicle_Collisions_-_Crashes_20250917.csv"
REPORT_DIR = "/Users/eamon/Desktop/University/UofT 2025-26/Fall/Applied Machine Learning/Week 3/Report"
Path(REPORT_DIR).mkdir(parents=True, exist_ok=True)

# policy window
POLICY_DATE = pd.Timestamp("2022-08-01")
START_DATE  = POLICY_DATE - pd.DateOffset(months=6)
END_DATE    = POLICY_DATE + pd.DateOffset(months=6)
VALID_BOROS = {"BRONX","BROOKLYN","MANHATTAN","QUEENS","STATEN ISLAND"}

# load
use_cols = [
    "CRASH DATE","CRASH TIME","BOROUGH",
    "NUMBER OF PERSONS INJURED",
    "VEHICLE TYPE CODE 1","VEHICLE TYPE CODE 2","VEHICLE TYPE CODE 3",
    "VEHICLE TYPE CODE 4","VEHICLE TYPE CODE 5"
]
df = pd.read_csv(DATA_PATH, usecols=use_cols, low_memory=False)

# parse date/time
df["date"] = pd.to_datetime(df["CRASH DATE"], errors="coerce")
df["hour"] = pd.to_datetime(df["CRASH TIME"], format="%H:%M", errors="coerce").dt.hour
df = df.dropna(subset=["date","hour"]).copy()
df["hour"] = df["hour"].astype(int)

# window + borough filter
df = df[(df["date"] >= START_DATE) & (df["date"] <= END_DATE)].copy()
df["BOROUGH"] = df["BOROUGH"].astype(str).str.upper().str.strip()
df = df[df["BOROUGH"].isin(VALID_BOROS)].copy()
df["borough"] = df["BOROUGH"]

# DiD indicators
df["after"]       = (df["date"] >= POLICY_DATE).astype(int)
df["night"]       = ((df["hour"] >= 22) | (df["hour"] < 6)).astype(int)
df["after_night"] = df["after"] * df["night"]

# weekend dummy
df["weekend"] = (df["date"].dt.dayofweek >= 5).astype(int)

# vehicle map
def map_vehicle(s):
    if pd.isna(s): return "OTHER"
    s = str(s).upper().strip()
    if any(k in s for k in ["SEDAN","PASSENGER","CONVERTIBLE","3-DOOR"]): return "CAR"
    if any(k in s for k in ["SPORT UTILITY","STATION WAGON"]): return "SUV_WAGON"
    if any(k in s for k in ["TAXI","LIVERY"]): return "TAXI"
    if any(k in s for k in ["PICK","VAN","CARRY ALL","REFRIGERATED VAN"]): return "LIGHT_TRUCK_VAN"
    if ("TRUCK" in s and "PICK" not in s) or any(k in s for k in
        ["TRACTOR","DUMP","FLAT","WRECKER","GARBAGE","CONCRETE","ARMORED","BEVERAGE","CHASSIS","TANKER","LARGE COM","SMALL COM"]): return "TRUCK"
    if "BUS" in s: return "BUS"
    if any(k in s for k in ["BIKE","BICYCLE"]): return "BICYCLE"
    if any(k in s for k in ["MOTORCYCLE","MOTORBIKE","SCOOTER","MOPED"]): return "MOTORCYCLE"
    if any(k in s for k in ["E-BIKE","EBIKE","E-SCOOT"]): return "MICROMOBILITY"
    if any(k in s for k in ["AMBULANCE","FIRE"]): return "EMERGENCY"
    return "OTHER"

veh_first = (
    df[["VEHICLE TYPE CODE 1","VEHICLE TYPE CODE 2","VEHICLE TYPE CODE 3","VEHICLE TYPE CODE 4","VEHICLE TYPE CODE 5"]]
      .bfill(axis=1).iloc[:,0]
)
df["veh_cat"] = veh_first.map(map_vehicle)

# outcome
df["injured"] = pd.to_numeric(df["NUMBER OF PERSONS INJURED"], errors="coerce")
df = df.dropna(subset=["injured"]).copy()

# OLS A: interaction only
fit_A = smf.ols(
    "injured ~ after_night + weekend + C(veh_cat) + C(borough)",
    data=df
).fit(cov_type="cluster", cov_kwds={"groups": df["borough"]})

# OLS B: separate after & night
fit_B = smf.ols(
    "injured ~ after + night + weekend + C(veh_cat) + C(borough)",
    data=df
).fit(cov_type="cluster", cov_kwds={"groups": df["borough"]})

# helpers
def pull_beta_se(fit, name):
    return fit.params.get(name, np.nan), fit.bse.get(name, np.nan)

def add_row(rows, label, beta, se):
    t = beta / se if (pd.notna(beta) and pd.notna(se) and se != 0) else np.nan
    p = 2 * (1 - scistats.norm.cdf(abs(t))) if pd.notna(t) else np.nan
    rows.append((label, beta, se, t, p))

# collect estimates
b_int,  se_int  = pull_beta_se(fit_A, "Intercept")
b_axn,  se_axn  = pull_beta_se(fit_A, "after_night")
b_after,se_after= pull_beta_se(fit_B, "after")
b_night,se_night= pull_beta_se(fit_B, "night")

veh_keys = [
    ("C(veh_cat)[T.EMERGENCY]", "Vehicle: Emergency"),
    ("C(veh_cat)[T.BUS]",       "Vehicle: Bus"),
    ("C(veh_cat)[T.CAR]",       "Vehicle: Car"),
    ("C(veh_cat)[T.TRUCK]",     "Vehicle: Truck"),
]

# build table rows
rows = []
add_row(rows, "Intercept",      b_int,  se_int)
add_row(rows, "After × Night",  b_axn,  se_axn)
add_row(rows, r"\hspace{1em}After", b_after, se_after)
add_row(rows, r"\hspace{1em}Night", b_night, se_night)
for key, lab in veh_keys:
    b, se = pull_beta_se(fit_A, key)
    add_row(rows, lab, b, se)

tab = pd.DataFrame(rows, columns=["Variable","Coefficient","Std. Error","t-Statistic","p-Value"])

# export CSV
csv_path = Path(REPORT_DIR) / "did_ols_interaction_plus_decomp_NO_MONTH.csv"
tab.to_csv(csv_path, index=False)

# format
tab_fmt = tab.copy()
for c in ["Coefficient","Std. Error","t-Statistic","p-Value"]:
    tab_fmt[c] = tab_fmt[c].map(lambda x: "" if pd.isna(x) else f"{x:.4f}")

# export TeX — TABULAR ONLY (use this one with \input inside your own table float)
tex_tabular = Path(REPORT_DIR) / "did_ols_interaction_plus_decomp_NO_MONTH_tabular.tex"
with open(tex_tabular, "w") as f:
    f.write(tab_fmt.to_latex(index=False, escape=False))

# optional: export TeX with caption/label (standalone)
tex_withcap = Path(REPORT_DIR) / "did_ols_interaction_plus_decomp_NO_MONTH_withcaption.tex"
with open(tex_withcap, "w") as f:
    f.write(tab_fmt.to_latex(
        index=False, escape=False,
        caption="OLS with Interaction and Decomposition (No Month FE, + Weekend)",
        label="tab:did_interaction_decomp_no_month"
    ))

# also save full summaries (optional)
(Path(REPORT_DIR)/"did_ols_A_full_NO_MONTH.txt").write_text(fit_A.summary().as_text())
(Path(REPORT_DIR)/"did_ols_B_full_NO_MONTH.txt").write_text(fit_B.summary().as_text())

print(f"[saved] {csv_path}")
print(f"[saved] {tex_tabular}")
print(f"[saved] {tex_withcap}")












[saved] /Users/eamon/Desktop/University/UofT 2025-26/Fall/Applied Machine Learning/Week 3/Report/did_ols_interaction_plus_decomp_NO_MONTH.csv
[saved] /Users/eamon/Desktop/University/UofT 2025-26/Fall/Applied Machine Learning/Week 3/Report/did_ols_interaction_plus_decomp_NO_MONTH_tabular.tex
[saved] /Users/eamon/Desktop/University/UofT 2025-26/Fall/Applied Machine Learning/Week 3/Report/did_ols_interaction_plus_decomp_NO_MONTH_withcaption.tex
