In [None]:
import warnings
import numpy as np
import pandas as pd
import statsmodels.api as sm
import patsy
from statsmodels.tools.sm_exceptions import PerfectSeparationWarning


def stata_logit(formula: str, df: pd.DataFrame):
    """
    Emulates the R helper `stata_logit()` used in DIDmultiplegtSTAT.

    Key choices to be closer to R's `glm(..., family=binomial(link="logit"), weights=...)`:
      - Drop rows with missing values in any variables used by the formula (R's default `na.omit` behavior).
      - Drop rows with missing weights (instead of treating them as 0-weight).
      - Use freq_weights (case weights) as closest analogue to R prior/case weights in this context.
      - Match R's glm.control defaults: maxit=300, epsilon=1e-8.
    """

    if df is None or len(df) == 0:
        raise ValueError("stata_logit: df is empty.")

    # --- Weights (match R behavior: NA weights -> row dropped) ---
    if "weight_XX" in df.columns:
        w = pd.to_numeric(df["weight_XX"], errors="coerce").astype(float)
    else:
        w = pd.Series(1.0, index=df.index, dtype=float)

    # Keep only strictly positive weights (0-weight observations don't affect the likelihood).
    # (R effectively ignores them too.)
    keep_w = w.notna() & (w > 0)

    # --- Build design matrices with Patsy and DROP missing (match glm's na.omit) ---
    # Patsy will drop any row with NA in variables referenced in the formula.
    # We do it AFTER keep_w so rows with NA/0 weights are removed too.
    df2 = df.loc[keep_w].copy()
    w2 = w.loc[keep_w]

    # If df2 becomes empty, fail fast with a clear message.
    if df2.shape[0] == 0:
        raise ValueError("stata_logit: no rows left after dropping missing/zero weights.")

    # Build y, X with NA drop
    y, X = patsy.dmatrices(formula, df2, return_type="dataframe", NA_action="drop")
    # Align weights to design-matrix rows
    wX = w2.loc[X.index].astype(float)

    if X.shape[0] == 0:
        raise ValueError("stata_logit: no rows left after dropping NA in formula variables.")

    # --- Fit GLM logit ---
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", PerfectSeparationWarning)
        warnings.simplefilter("ignore")  # keep output quiet like the R helper
        model = sm.GLM(
            y,
            X,
            family=sm.families.Binomial(link=sm.families.links.logit()),
            freq_weights=wX.values,
        )
        res = model.fit(maxiter=300, tol=1e-8, disp=0)

    return res
