# 04 â€” Confidence Intervals and HCOPE

We demonstrate bootstrap confidence intervals and a high-confidence lower
bound (HCOPE) for bandit OPE.

## Setup

```
pip install ".[plots]"
```

In [1]:
from __future__ import annotations

import matplotlib.pyplot as plt
import numpy as np

from crl.assumptions import AssumptionSet
from crl.assumptions_catalog import BOUNDED_REWARDS, OVERLAP, SEQUENTIAL_IGNORABILITY
from crl.benchmarks.bandit_synth import SyntheticBandit, SyntheticBanditConfig
from crl.estimands.policy_value import PolicyValueEstimand
from crl.estimators.bootstrap import BootstrapConfig, bootstrap_ci
from crl.estimators.high_confidence import (
    HighConfidenceConfig,
    HighConfidenceISEstimator,
)
from crl.estimators.importance_sampling import ISEstimator
from crl.utils.seeding import set_seed

In [2]:
set_seed(0)
np.random.seed(0)

## Bootstrap CI for IS

In [3]:
benchmark = SyntheticBandit(SyntheticBanditConfig(seed=0))
dataset = benchmark.sample(num_samples=1_000, seed=1)

estimand = PolicyValueEstimand(
    policy=benchmark.target_policy,
    discount=1.0,
    horizon=1,
    assumptions=AssumptionSet([SEQUENTIAL_IGNORABILITY, OVERLAP, BOUNDED_REWARDS]),
)

is_estimator = ISEstimator(estimand)
bootstrap_cfg = BootstrapConfig(
    num_bootstrap=200, method="trajectory", alpha=0.05, seed=0
)
stderr, ci = bootstrap_ci(lambda: ISEstimator(estimand), dataset, bootstrap_cfg)
stderr, ci

(0.039024088833448496, (-0.47470804827970114, -0.31666408191960055))

In [None]:
is_report = is_estimator.estimate(dataset)
print(
    "IS estimate: "
    f"{is_report.value:.3f} | bootstrap CI=({ci[0]:.3f}, {ci[1]:.3f})"
)

## High-confidence lower bound (HCOPE)

HCOPE produces a lower bound that holds with probability `1 - delta` under
bounded rewards.

In [4]:
hcope_report = HighConfidenceISEstimator(estimand).estimate(dataset)
hcope_report.value, hcope_report.ci

(np.float64(-0.5231992066402067),
 (np.float64(-0.5231992066402067), -0.39309521582481))

In [None]:
print(
    "HCOPE lower bound: "
    f"{hcope_report.value:.3f} | implied upper={hcope_report.ci[1]:.3f}"
)

## Explicit HCOPE configuration

When you know a reward bound, pass it explicitly for tighter, reliable bounds.

In [5]:
hcope_config = HighConfidenceConfig(delta=0.1, reward_bound=2.0)
hcope_report_cfg = HighConfidenceISEstimator(estimand, config=hcope_config).estimate(
    dataset
)
hcope_report_cfg.to_dataframe()

Unnamed: 0,value,stderr,ci,diagnostics,assumptions_checked,assumptions_flagged,warnings,metadata,lower_bound,upper_bound
0,-0.501833,,"(-0.5018331065610694, -0.39309521582481)",{'overlap': {'min_behavior_prob': 0.1097201314...,"[sequential_ignorability, overlap, bounded_rew...",[],[],"{'estimator': 'HCOPE', 'delta': 0.1, 'reward_b...",-0.501833,-0.393095


## Visual comparison

Compare a two-sided bootstrap CI with a one-sided high-confidence lower bound.

In [None]:
fig, ax = plt.subplots(figsize=(4.2, 2.6))
ax.errorbar(
    [0],
    [is_report.value],
    yerr=[[is_report.value - ci[0]], [ci[1] - is_report.value]],
    fmt="o",
    capsize=4,
    color="tab:blue",
    label="IS (bootstrap CI)",
)
ax.errorbar(
    [1],
    [hcope_report.value],
    yerr=[[0.0], [hcope_report.ci[1] - hcope_report.value]],
    fmt="o",
    capsize=4,
    color="tab:orange",
    label="HCOPE (lower bound)",
)
ax.set_xticks([0, 1], ["IS", "HCOPE"])
ax.set_ylabel("Estimated policy value")
ax.set_title("CI vs. high-confidence bound")
ax.legend(frameon=False)
fig.tight_layout()
fig

## Takeaways

- Bootstrap CIs provide a general uncertainty estimate.
- HCOPE yields a conservative lower bound with explicit guarantees.