diff --git a/examples/placebo_test.pct.py b/examples/placebo_test.pct.py new file mode 100644 index 0000000..7016483 --- /dev/null +++ b/examples/placebo_test.pct.py @@ -0,0 +1,101 @@ +# --- +# jupyter: +# jupytext: +# cell_metadata_filter: -all +# custom_cell_magics: kql +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: causal-validation +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Placebo Testing +# +# A placebo test is an approach to assess the validity of a causal model by checking if +# the effect can truly be attributed to the treatment, or to other spurious factors. A +# placebo test is conducted by iterating through the set of control units and at each +# iteration, replacing the treated unit by one of the control units and measuring the +# effect. If the model detects a significant effect, then it suggests potential bias or +# omitted variables in the analysis, indicating that the causal inference is flawed. +# +# A successful placebo test will show no statistically significant results and we may +# then conclude that the estimated effect can be attributed to the treatment and not +# driven by confounding factors. Conversely, a failed placebo test, which shows +# significant results, suggests that the identified treatment effect may not be +# reliable. Placebo testing is thus a critical step to ensure the robustness of findings +# in RCTs. In this notebook, we demonstrate how a placebo test can be conducted in +# `causal-validation`. + +# %% +from azcausal.core.error import JackKnife +from azcausal.estimators.panel.did import DID +from azcausal.estimators.panel.sdid import SDID + +from causal_validation import ( + Config, + simulate, +) +from causal_validation.effects import StaticEffect +from causal_validation.models import AZCausalWrapper +from causal_validation.plotters import plot +from causal_validation.transforms import ( + Periodic, + Trend, +) +from causal_validation.validation.placebo import PlaceboTest + +# %% [markdown] +# ## Data simulation +# +# To demonstrate a placebo test, we must first simulate some data. For the purposes of +# illustration, we'll simulate a very simple dataset containing 10 control units where +# each unit has 60 pre-intervention observations, and 30 post-intervention observations. + +# %% +cfg = Config( + n_control_units=10, + n_pre_intervention_timepoints=60, + n_post_intervention_timepoints=30, + seed=123, +) + +TRUE_EFFECT = 0.05 +effect = StaticEffect(effect=TRUE_EFFECT) +data = effect(simulate(cfg)) +plot(data) + +# %% [markdown] +# ## Model +# +# We'll now define our model. To do this, we'll use the synthetic +# difference-in-differences implementation of AZCausal. This implementation, along with +# any other model from AZCausal, can be neatly wrapped up in our `AZCausalWrapper` to +# make fitting and effect estimation simpler. + +# %% +model = AZCausalWrapper(model=SDID(), error_estimator=JackKnife()) + +# %% [markdown] +# ## Placebo Test Results +# +# Now that we have a dataset and model defined, we may conduct our placebo test. With 10 +# control units, the test will estimate 10 individual effects; 1 per control unit when +# it is mocked as the treated group. With those 10 effects, the routine will then +# produce the mean estimated effect, along with the standard deviation across the +# estimated effect, the effect's standard error, and the p-value that corresponds to the +# null-hypothesis test that the effect is 0. +# +# In the below, we see that expected estimated effect is small at just 0.08. +# Accordingly, the p-value attains a value of 0.5, indicating that we have insufficient +# evidence to reject the null hypothesis and we, therefore, have no evidence to suggest +# that there is bias within this particular setup. + +# %% +result = PlaceboTest(model, data).execute() +result.summary() diff --git a/pyproject.toml b/pyproject.toml index f9968f3..0112fd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "matplotlib", "numpy", "pandas", + "pandera" ] [tool.hatch.build] diff --git a/src/causal_validation/__about__.py b/src/causal_validation/__about__.py index dacd2b4..f83f980 100644 --- a/src/causal_validation/__about__.py +++ b/src/causal_validation/__about__.py @@ -1,3 +1,3 @@ -__version__ = "0.0.3" +__version__ = "0.0.4" __all__ = ["__version__"] diff --git a/src/causal_validation/config.py b/src/causal_validation/config.py index 554206f..8040be1 100644 --- a/src/causal_validation/config.py +++ b/src/causal_validation/config.py @@ -3,7 +3,6 @@ field, ) import datetime as dt -import typing as tp import numpy as np diff --git a/src/causal_validation/models.py b/src/causal_validation/models.py new file mode 100644 index 0000000..5b81fb4 --- /dev/null +++ b/src/causal_validation/models.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass +import typing as tp + +from azcausal.core.error import Error +from azcausal.core.estimator import Estimator +from azcausal.core.result import Result + +from causal_validation.data import Dataset + + +@dataclass +class AZCausalWrapper: + model: Estimator + error_estimator: tp.Optional[Error] = None + + def __call__(self, data: Dataset, **kwargs) -> Result: + panel = data.to_azcausal() + result = self.model.fit(panel, **kwargs) + if self.error_estimator: + self.model.error(result, self.error_estimator) + return result diff --git a/src/causal_validation/validation/__init__.py b/src/causal_validation/validation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/causal_validation/validation/placebo.py b/src/causal_validation/validation/placebo.py new file mode 100644 index 0000000..f12e2eb --- /dev/null +++ b/src/causal_validation/validation/placebo.py @@ -0,0 +1,65 @@ +from dataclasses import dataclass +import typing as tp + +from azcausal.core.effect import Effect +import numpy as np +import pandas as pd +from pandera import ( + Check, + Column, + DataFrameSchema, +) +from scipy.stats import ttest_1samp +from tqdm import trange + +from causal_validation.data import Dataset +from causal_validation.models import AZCausalWrapper + +PlaceboSchema = DataFrameSchema( + { + "Effect": Column(float, coerce=True), + "Standard Deviation": Column( + float, checks=[Check.greater_than(0.0)], coerce=True + ), + "Standard Error": Column(float, checks=[Check.greater_than(0.0)], coerce=True), + "p-value": Column(float, coerce=True), + } +) + + +@dataclass +class PlaceboTestResult: + effects: tp.List[Effect] + + def summary(self) -> pd.DataFrame: + _effects = [effect.value for effect in self.effects] + _n_effects = len(_effects) + expected_effect = np.mean(_effects) + stddev_effect = np.std(_effects) + std_error = stddev_effect / np.sqrt(_n_effects) + p_value = ttest_1samp(_effects, 0, alternative="two-sided").pvalue + result = { + "Effect": expected_effect, + "Standard Deviation": stddev_effect, + "Standard Error": std_error, + "p-value": p_value, + } + result_df = pd.DataFrame([result]) + PlaceboSchema.validate(result_df) + return result_df + + +@dataclass +class PlaceboTest: + model: AZCausalWrapper + dataset: Dataset + + def execute(self) -> PlaceboTestResult: + n_control_units = self.dataset.n_units + results = [] + for i in trange(n_control_units): + placebo_data = self.dataset.to_placebo_data(i) + result = self.model(placebo_data) + result = result.effect.percentage() + results.append(result) + return PlaceboTestResult(effects=results) diff --git a/tests/test_causal_validation/test_models.py b/tests/test_causal_validation/test_models.py new file mode 100644 index 0000000..feeb1ce --- /dev/null +++ b/tests/test_causal_validation/test_models.py @@ -0,0 +1,63 @@ +import typing as tp + +from azcausal.core.effect import Effect +from azcausal.core.error import ( + Bootstrap, + Error, + JackKnife, +) +from azcausal.core.estimator import Estimator +from azcausal.core.result import Result +from azcausal.estimators.panel import ( + did, + sdid, +) +from hypothesis import ( + given, + settings, + strategies as st, +) +import numpy as np + +from causal_validation.models import AZCausalWrapper +from causal_validation.testing import ( + TestConstants, + simulate_data, +) + +MODELS = [did.DID(), sdid.SDID()] +MODEL_ERROR = [ + (did.DID(), None), + (sdid.SDID(), None), + (sdid.SDID(), Bootstrap()), + (sdid.SDID(), JackKnife()), +] + + +@given( + model_error=st.sampled_from(MODEL_ERROR), + n_control=st.integers(min_value=2, max_value=5), + n_pre_treatment=st.integers(min_value=1, max_value=50), + n_post_treatment=st.integers(min_value=1, max_value=50), + seed=st.integers(min_value=1, max_value=100), +) +@settings(max_examples=10) +def test_call( + model_error: tp.Union[Estimator, Error], + n_control: int, + n_pre_treatment: int, + n_post_treatment: int, + seed: int, +): + constancts = TestConstants( + N_CONTROL=n_control, + N_PRE_TREATMENT=n_pre_treatment, + N_POST_TREATMENT=n_post_treatment, + ) + data = simulate_data(global_mean=10.0, seed=seed, constants=constancts) + model = AZCausalWrapper(*model_error) + result = model(data) + + assert isinstance(result, Result) + assert isinstance(result.effect, Effect) + assert not np.isnan(result.effect.value) diff --git a/tests/test_causal_validation/test_plotters.py b/tests/test_causal_validation/test_plotters.py index fcdd286..a034fde 100644 --- a/tests/test_causal_validation/test_plotters.py +++ b/tests/test_causal_validation/test_plotters.py @@ -30,12 +30,12 @@ @given( - n_control=st.integers(min_value=1, max_value=50), + n_control=st.integers(min_value=1, max_value=10), n_pre_treatment=st.integers(min_value=1, max_value=50), n_post_treatment=st.integers(min_value=1, max_value=50), ax_bool=st.booleans(), ) -@settings(max_examples=5) +@settings(max_examples=10) def test_plot( n_control: int, n_pre_treatment: int, n_post_treatment: int, ax_bool: bool ): @@ -47,7 +47,9 @@ def test_plot( data = simulate_data(0.0, DEFAULT_SEED, constants=constants) if ax_bool: _, ax = plt.subplots() - ax = plot(data) + else: + ax = None + ax = plot(data, ax=ax) assert isinstance(ax, Axes) assert len(ax.lines) == n_control + 2 assert ax.get_legend() is not None diff --git a/tests/test_causal_validation/test_validation/test_placebo.py b/tests/test_causal_validation/test_validation/test_placebo.py new file mode 100644 index 0000000..12557b0 --- /dev/null +++ b/tests/test_causal_validation/test_validation/test_placebo.py @@ -0,0 +1,64 @@ +import typing as tp + +from azcausal.estimators.panel.did import DID +from azcausal.estimators.panel.sdid import SDID +from hypothesis import ( + given, + settings, + strategies as st, +) +import numpy as np +import pandas as pd +import pytest + +from causal_validation.models import AZCausalWrapper +from causal_validation.testing import ( + TestConstants, + simulate_data, +) +from causal_validation.transforms import Trend +from causal_validation.validation.placebo import ( + PlaceboSchema, + PlaceboTest, + PlaceboTestResult, +) + + +def test_schema_coerce(): + df = PlaceboSchema.example() + cols = df.columns + for col in cols: + df[col] = np.ceil((df[col])) + PlaceboSchema.validate(df) + + +@given( + global_mean=st.floats(min_value=0.0, max_value=10.0), + seed=st.integers(min_value=0, max_value=1000000), + n_control=st.integers(min_value=10, max_value=20), + model=st.sampled_from([DID(), SDID()]), +) +@settings(max_examples=10) +def test_placebo_test( + global_mean: float, seed: int, n_control: int, model: tp.Union[DID, SDID] +): + # Simulate data with a trend + constants = TestConstants(N_CONTROL=n_control, GLOBAL_SCALE=0.001) + data = simulate_data(global_mean=global_mean, seed=seed, constants=constants) + trend_term = Trend(degree=1, coefficient=0.1) + data = trend_term(data) + + # Execute the placebo test + model = AZCausalWrapper(model) + result = PlaceboTest(model, data).execute() + + # Check that the structure of result + assert isinstance(result, PlaceboTestResult) + assert len(result.effects) == n_control + + # Check the results are close to the true effect + summary = result.summary() + PlaceboSchema.validate(summary) + assert isinstance(summary, pd.DataFrame) + assert summary.shape == (1, 4) + assert summary["Effect"].iloc[0] == pytest.approx(0.0, abs=0.1)