In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
"""Debugging for analytical bootstrap delta method."""


from collections.abc import Callable
from functools import partial

import numpy as np
import pandas as pd

from thesis.classes import Instrument, LocalATEs
from thesis.config import RNG
from thesis.simple_model.funcs import (
    _draw_bootstrap_data,
    _draw_data,
    _estimate_pscores,
    _idset,
    _late,
    _late_2sls,
    simulation_bootstrap,
)

In [None]:
late_complier = 0

local_ates = LocalATEs(
    never_taker=0,
    complier=late_complier,
    always_taker=np.min((1, 1 + late_complier)),
)

instrument = Instrument(
    support=np.array([0, 1]),
    pmf=np.array([0.5, 0.5]),
    pscores=np.array([0.4, 0.6]),
)


def bic(n):
    """BIC."""
    return np.sqrt(np.log(n))


def lil(n):
    """Law of iterated logarithm."""
    return np.sqrt(2 * np.log(np.log(n)))

In [None]:
def _d_phi_kink(
    h: float,
    beta_late: float,
    sigma_hat: float,
    kappa_n: float,
    rn: float,
    slope_left: float,
    slope_right: float,
    kink: float = 0,
) -> float:
    """Estimator for the derivative of the identified set."""
    # TODO(@buddejul): Wrifte more general version allowing for different kink point.
    # Currently we do this for a kink at zero; this should show up in the pre-test.

    cond_right = rn * (beta_late - kink) / sigma_hat > kappa_n
    cond_left = rn * (beta_late - kink) / sigma_hat < -kappa_n
    cond_mid = ~cond_right & ~cond_left

    return (
        cond_right * h * slope_right
        + cond_left * h * slope_left
        + cond_mid * ((h < 0) * h * slope_left + (h > 0) * h * slope_right),
        cond_left,
        cond_mid,
        cond_right,
    )


def _ci_analytical_delta_bootstrap(
    n_boot: int,
    data: np.ndarray,
    alpha: float,
    u_hi: float,
    constraint_mtr: str,
    rng: np.random.Generator,
    kappa_fun: Callable = lambda n: n ** (1 / 6),
) -> tuple[float, float]:
    """Compute the analytical delta bootstrap confidence interval.

    Based on Fang and Santos (2017), adapted from Example 2.1 equation (26). kappa_fun
    is the tuning parameter used for the pretest to estimate the derivative.

    """
    n_obs = data.shape[0]

    kappa_n = kappa_fun(n_obs)

    rn = np.sqrt(n_obs)

    # Estimate late using 2SLS to get the standard error for estimating the derivative.
    late, se_late = _late_2sls(data)

    # Note se_late is the finite sample standard error and not the asymptotic variance.
    # Hence, we multiply by rn to get the asymptotic variance.
    sigma_hat = rn * se_late

    pscores = _estimate_pscores(data)

    boot_late_scaled_and_centered = np.zeros(n_boot)

    w = (pscores[1] - pscores[0]) / (u_hi + pscores[1] - pscores[0])

    # Step 1: Bootstrap quantiles for the identified parameter beta_s.
    for i in range(n_boot):
        # Step 1: Draw Z_s from the bootstrap distribution of beta_s.
        boot_data, _ = _draw_bootstrap_data(data=data, n_obs=n_obs, rng=rng)

        boot_late_scaled_and_centered[i] = rn * (_late(boot_data) - late)

    # Step 2: Estimate the derivative.
    # We need two separate derivatives, since the upper and lower bound have different
    # solutions.
    _di_phi = partial(
        _d_phi_kink,
        beta_late=late,
        sigma_hat=sigma_hat,
        kappa_n=kappa_n,
        rn=rn,
    )

    if constraint_mtr == "none":
        d_phi_upper = partial(_di_phi, slope_left=w, slope_right=w)
        d_phi_lower = d_phi_upper

    elif constraint_mtr == "increasing":
        d_phi_upper = partial(
            _d_phi_kink,
            slope_left=1,
            slope_right=w,
            beta_late=late,
            sigma_hat=sigma_hat,
            kappa_n=kappa_n,
            rn=rn,
        )
        d_phi_lower = partial(
            _d_phi_kink,
            slope_left=w,
            slope_right=1,
            beta_late=late,
            sigma_hat=sigma_hat,
            kappa_n=kappa_n,
            rn=rn,
        )

    # Step 3: Apply derivative to bootstrap quantiles to get the confidence interval.
    # In our special case we know the function is monotonically increasing, hence we can
    # compute the bootstrap percentile first and then apply the estimated derivative.
    id_lo, id_hi = _idset(
        b_late=late,
        u_hi=u_hi,
        pscores_hat=pscores,
        constraint_mtr=constraint_mtr,
    )

    _c_1_minus_alpha_half, lo_left, lo_mid, lo_right = d_phi_upper(
        np.quantile(boot_late_scaled_and_centered, 1 - alpha / 2),
    )
    boot_ci_lo = id_lo - _c_1_minus_alpha_half / rn

    _c_alpha_half, hi_left, hi_mid, hi_right = d_phi_lower(
        np.quantile(boot_late_scaled_and_centered, alpha / 2),
    )
    boot_ci_hi = id_hi - _c_alpha_half / rn

    lhs = rn * late / sigma_hat

    return {
        "boot_ci_lo": boot_ci_lo,
        "boot_ci_hi": boot_ci_hi,
        "id_lo": id_lo,
        "id_hi": id_hi,
        "_c_alpha_half": _c_alpha_half,
        "_c_1_minus_alpha_half": _c_1_minus_alpha_half,
        "lo_left": lo_left,
        "lo_mid": lo_mid,
        "lo_right": lo_right,
        "hi_left": hi_left,
        "hi_mid": hi_mid,
        "hi_right": hi_right,
        "kappa_n": kappa_n,
        "lhs": lhs,
        "late": late,
        "se_late": se_late,
        "sigma_hat": sigma_hat,
        "rn": rn,
    }

In [None]:
n_boot = 500
n_obs = 10_000
data = _draw_data(n_obs=10_000, local_ates=local_ates, instrument=instrument, rng=RNG)
alpha = 0.05
u_hi = 0.2
constraint_mtr = "increasing"
rng = RNG


def kappa_fun(n):
    """Kappa fun."""
    return n ** (1 / 6)

In [None]:
out = _ci_analytical_delta_bootstrap(
    n_boot=n_boot,
    data=data,
    alpha=alpha,
    u_hi=u_hi,
    constraint_mtr=constraint_mtr,
    rng=rng,
    kappa_fun=kappa_fun,
)

In [None]:
def _check_pretests(n_reps: int) -> list[dict]:
    res = []

    n_boot = 2_000
    n_obs = 10_000
    alpha = 0.05
    u_hi = 0.2
    constraint_mtr = "increasing"
    rng = RNG

    def kappa_fun(n):
        return n ** (1 / 2)

    for _ in range(n_reps):
        print(f"Rep: {_}")
        data = _draw_data(
            n_obs=n_obs,
            local_ates=local_ates,
            instrument=instrument,
            rng=RNG,
        )

        out = _ci_analytical_delta_bootstrap(
            n_boot=n_boot,
            data=data,
            alpha=alpha,
            u_hi=u_hi,
            constraint_mtr=constraint_mtr,
            rng=rng,
            kappa_fun=kappa_fun,
        )

        res.append(out)

    return res

In [None]:
res = _check_pretests(1)

# Put results into dataframe, res is list of dicts
data = pd.DataFrame(res)
true = 0.5
data["covers"] = (data["boot_ci_lo"] <= true) & (true <= data["boot_ci_hi"])
data.head()

In [None]:
data.mean()

In [None]:
late_complier = 0

local_ates = LocalATEs(
    never_taker=0,
    complier=late_complier,
    always_taker=np.min((1, 1 + late_complier)),
)

instrument = Instrument(
    support=np.array([0, 1]),
    pmf=np.array([0.5, 0.5]),
    pscores=np.array([0.4, 0.6]),
)


res_sim = simulation_bootstrap(
    n_sims=1,
    n_obs=10_000,
    n_boot=500,
    u_hi=0.2,
    alpha=0.05,
    constraint_mtr="increasing",
    local_ates=local_ates,
    instrument=instrument,
    bootstrap_method="analytical_delta",
    rng=RNG,
    bootstrap_params={"kappa_fun": lambda n: n ** (1 / 6)},
)

In [None]:
res_sim["covers"] = (res_sim["lo"] <= res_sim["true"]) & (
    res_sim["true"] <= res_sim["hi"]
)
res_sim.mean()