diff --git a/pyproject.toml b/pyproject.toml index 4ea4544..f9968f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,7 @@ exclude_lines = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] +show_missing = true [tool.black] line-length = 88 diff --git a/src/causal_validation/__about__.py b/src/causal_validation/__about__.py index 6bfcf97..dacd2b4 100644 --- a/src/causal_validation/__about__.py +++ b/src/causal_validation/__about__.py @@ -1,3 +1,3 @@ -__version__ = "0.0.2" +__version__ = "0.0.3" __all__ = ["__version__"] diff --git a/src/causal_validation/config.py b/src/causal_validation/config.py index 901b0f2..554206f 100644 --- a/src/causal_validation/config.py +++ b/src/causal_validation/config.py @@ -7,11 +7,12 @@ import numpy as np +from causal_validation.types import ( + Number, + WeightTypes, +) from causal_validation.weights import UniformWeights -if tp.TYPE_CHECKING: - from causal_validation.types import WeightTypes - @dataclass(kw_only=True, frozen=True) class WeightConfig: @@ -23,8 +24,8 @@ class Config: n_control_units: int n_pre_intervention_timepoints: int n_post_intervention_timepoints: int - global_mean: float = 20.0 - global_scale: float = 0.2 + global_mean: Number = 20.0 + global_scale: Number = 0.2 start_date: dt.date = dt.date(year=2023, month=1, day=1) seed: int = 123 weights_cfg: WeightConfig = field(default_factory=WeightConfig) diff --git a/src/causal_validation/data.py b/src/causal_validation/data.py index 202cb63..21079d1 100644 --- a/src/causal_validation/data.py +++ b/src/causal_validation/data.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from copy import deepcopy from dataclasses import dataclass import datetime as dt @@ -26,7 +28,9 @@ class Dataset: _start_date: dt.date counterfactual: tp.Optional[Float[np.ndarray, "M 1"]] = None - def to_df(self, index_start: str = "2023-01-01") -> pd.DataFrame: + def to_df( + self, index_start: str = dt.date(year=2023, month=1, day=1) + ) -> pd.DataFrame: inputs = np.vstack([self.Xtr, self.Xte]) outputs = np.vstack([self.ytr, self.yte]) data = np.hstack([outputs, inputs]) @@ -54,11 +58,11 @@ def n_timepoints(self) -> int: return self.n_post_intervention + self.n_pre_intervention @property - def control_units(self) -> Float[np.ndarray, "N+M 1"]: + def control_units(self) -> Float[np.ndarray, "{self.n_timepoints} {self.n_units}"]: return np.vstack([self.Xtr, self.Xte]) @property - def treated_units(self) -> Float[np.ndarray, "N+M 1"]: + def treated_units(self) -> Float[np.ndarray, "{self.n_timepoints} 1"]: return np.vstack([self.ytr, self.yte]) @property @@ -94,24 +98,37 @@ def _get_columns(self) -> tp.List[str]: colnames = ["T"] + [f"C{i}" for i in range(self.n_units)] return colnames - def _get_index(self, start_date: str) -> pd.Series: + def _get_index(self, start_date: dt.date) -> DatetimeIndex: return pd.date_range(start=start_date, freq="D", periods=self.n_timepoints) def _get_indicator(self) -> Integer[np.ndarray, "N 1"]: indicator = np.vstack( [ - np.zeros(shape=(self.n_pre_intervention, 1)), - np.ones(shape=(self.n_post_intervention, 1)), + np.zeros(shape=(self.n_pre_intervention, 1)).astype(np.int64), + np.ones(shape=(self.n_post_intervention, 1)).astype(np.int64), ] ) return indicator - def inflate(self, inflation_vals: Float[np.ndarray, "M 1"]) -> "Dataset": + def inflate(self, inflation_vals: Float[np.ndarray, "M 1"]) -> Dataset: Xtr, ytr = [deepcopy(i) for i in self.pre_intervention_obs] Xte, yte = [deepcopy(i) for i in self.post_intervention_obs] inflated_yte = yte * inflation_vals return Dataset(Xtr, Xte, ytr, inflated_yte, self._start_date, yte) + def __eq__(self, other: Dataset) -> bool: + ytr = np.allclose(self.ytr, other.ytr) + yte = np.allclose(self.yte, other.yte) + if self.Xtr.shape == other.Xtr.shape: + xtr = np.allclose(self.Xtr, other.Xtr) + else: + xtr = False + if self.Xte.shape == other.Xte.shape: + xte = np.allclose(self.Xte, other.Xte) + else: + xte = False + return all([xtr, ytr, xte, yte]) + def to_azcausal(self): time_index = np.arange(self.n_timepoints) data = self.to_df().assign(time=time_index).melt(id_vars=["time", "treated"]) @@ -128,3 +145,25 @@ def to_azcausal(self): @property def _slots(self) -> tp.Dict[str, int]: return {"n_units": self.n_units + 1, "n_timepoints": self.n_timepoints} + + def drop_unit(self, idx: int) -> Dataset: + Xtr = np.delete(self.Xtr, [idx], axis=1) + Xte = np.delete(self.Xte, [idx], axis=1) + return Dataset( + Xtr, Xte, self.ytr, self.yte, self._start_date, self.counterfactual + ) + + def to_placebo_data(self, to_treat_idx: int) -> Dataset: + ytr = self.Xtr[:, to_treat_idx].reshape(-1, 1) + yte = self.Xte[:, to_treat_idx].reshape(-1, 1) + dropped_data = self.drop_unit(to_treat_idx) + placebo_data = reassign_treatment(dropped_data, ytr, yte) + return placebo_data + + +def reassign_treatment( + data: Dataset, ytr: Float[np.ndarray, "N 1"], yte: Float[np.ndarray, "M 1"] +) -> Dataset: + Xtr = data.Xtr + Xte = data.Xte + return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual) diff --git a/src/causal_validation/types.py b/src/causal_validation/types.py index 34f5104..dd7e124 100644 --- a/src/causal_validation/types.py +++ b/src/causal_validation/types.py @@ -9,3 +9,4 @@ WeightTypes = tp.Literal["uniform", "non-uniform"] InterventionTypes = tp.Literal["pre-intervention", "post-intervention", "both"] RandomVariable = tp.Union[rv_continuous, rv_discrete] +Number = tp.Union[float, int] diff --git a/src/causal_validation/weights.py b/src/causal_validation/weights.py index f108a7d..42234f9 100644 --- a/src/causal_validation/weights.py +++ b/src/causal_validation/weights.py @@ -16,10 +16,10 @@ class AbstractWeights(BaseObject): name: str = "Abstract Weights" - def _get_weights(self, obs: Float[np.ndarray, "N D"]) -> Float[np.ndarray, " D"]: + def _get_weights(self, obs: Float[np.ndarray, "N D"]) -> Float[np.ndarray, "D 1"]: raise NotImplementedError("Please implement `_get_weights` in all subclasses.") - def get_weights(self, obs: Float[np.ndarray, "N D"]) -> Float[np.ndarray, " D"]: + def get_weights(self, obs: Float[np.ndarray, "N D"]) -> Float[np.ndarray, "D 1"]: weights = self._get_weights(obs) np.testing.assert_almost_equal( @@ -42,7 +42,7 @@ def weight_obs(self, obs: Float[np.ndarray, "N D"]) -> Float[np.ndarray, "N 1"]: class UniformWeights(AbstractWeights): name: str = "Uniform Weights" - def _get_weights(self, obs: Float[np.ndarray, "N D"]) -> Float[np.ndarray, " D"]: + def _get_weights(self, obs: Float[np.ndarray, "N D"]) -> Float[np.ndarray, "D 1"]: n_units = obs.shape[1] return np.repeat(1.0 / n_units, repeats=n_units).reshape(-1, 1) diff --git a/tests/conftest.py b/tests/conftest.py index 3a3512e..ef34936 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,9 @@ from hypothesis import settings +from jaxtyping import install_import_hook settings.register_profile( "causal_validation", database=None, max_examples=10, deadline=None ) settings.load_profile("causal_validation") +with install_import_hook("causal_validation", "beartype.beartype"): + import causal_validation # noqa: F401 diff --git a/tests/test_causal_validation/test_amzn_synthetic_causal_data_gen.py b/tests/test_causal_validation/test_amzn_synthetic_causal_data_gen.py deleted file mode 100644 index a244514..0000000 --- a/tests/test_causal_validation/test_amzn_synthetic_causal_data_gen.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_causal_validation_importable(): - assert True diff --git a/tests/test_causal_validation/test_data.py b/tests/test_causal_validation/test_data.py index fd79430..301ec86 100644 --- a/tests/test_causal_validation/test_data.py +++ b/tests/test_causal_validation/test_data.py @@ -1,3 +1,5 @@ +from copy import deepcopy + from azcausal.estimators.panel.did import DID from hypothesis import ( given, @@ -7,8 +9,12 @@ import numpy as np import pandas as pd from pandas.core.indexes.datetimes import DatetimeIndex +import pytest -from causal_validation.data import Dataset +from causal_validation.data import ( + Dataset, + reassign_treatment, +) from causal_validation.testing import ( TestConstants, simulate_data, @@ -166,3 +172,90 @@ def test_get_index(n_post_treatment: int, n_pre_treatment: int, idx: Interventio assert len(idx_vals) == n_post_treatment elif idx == "pre-intervention": assert len(idx_vals) == n_pre_treatment + + +@pytest.mark.parametrize("n_pre, n_post, n_control", [(60, 30, 10), (60, 30, 20)]) +def test_drop_unit(n_pre: int, n_post: int, n_control: int): + constants = TestConstants( + N_POST_TREATMENT=n_post, + N_PRE_TREATMENT=n_pre, + N_CONTROL=n_control, + ) + data = simulate_data(0.0, DEFAULT_SEED, constants=constants) + desired_shape_Xtr = (n_pre, n_control - 1) + desired_shape_Xte = (n_post, n_control - 1) + desired_shape_ytr = (n_pre, 1) + desired_shape_yte = (n_post, 1) + + for i in range(n_control): + reduced_data = data.drop_unit(i) + assert reduced_data.Xtr.shape == desired_shape_Xtr + assert reduced_data.Xte.shape == desired_shape_Xte + assert reduced_data.ytr.shape == desired_shape_ytr + assert reduced_data.yte.shape == desired_shape_yte + + +@pytest.mark.parametrize("n_pre, n_post, n_control", [(60, 30, 10), (60, 30, 20)]) +def test_to_placebo(n_pre: int, n_post: int, n_control: int): + constants = TestConstants( + N_POST_TREATMENT=n_post, + N_PRE_TREATMENT=n_pre, + N_CONTROL=n_control, + ) + data = simulate_data(0.0, DEFAULT_SEED, constants=constants) + desired_shape_Xtr = (n_pre, n_control - 1) + desired_shape_Xte = (n_post, n_control - 1) + desired_shape_ytr = (n_pre, 1) + desired_shape_yte = (n_post, 1) + + for i in range(n_control): + placebo_data = data.to_placebo_data(i) + assert placebo_data.Xtr.shape == desired_shape_Xtr + assert placebo_data.Xte.shape == desired_shape_Xte + assert placebo_data.ytr.shape == desired_shape_ytr + assert placebo_data.yte.shape == desired_shape_yte + assert not data == placebo_data + + +@given( + n_control=st.integers(min_value=2, max_value=50), + n_pre_treatment=st.integers(min_value=10, max_value=50), + n_post_treatment=st.integers(min_value=10, max_value=50), + global_mean=st.floats( + min_value=-5.0, max_value=5.0, allow_infinity=False, allow_nan=False + ), +) +@settings(max_examples=10) +def test_eq( + n_control: int, n_pre_treatment: int, n_post_treatment: int, global_mean: float +): + constants = TestConstants( + N_POST_TREATMENT=n_post_treatment, + N_PRE_TREATMENT=n_pre_treatment, + N_CONTROL=n_control, + ) + data = simulate_data(global_mean, DEFAULT_SEED, constants=constants) + copied_data = deepcopy(data) + assert data == copied_data + + # Shape mismatch + for i in range(n_control): + reduced_data = data.drop_unit(i) + assert not data == reduced_data + + +@pytest.mark.parametrize("n_pre, n_post, n_control", [(60, 30, 10), (60, 30, 20)]) +def test_reassign_treatment(n_pre: int, n_post: int, n_control: int): + constants = TestConstants( + N_POST_TREATMENT=n_post, + N_PRE_TREATMENT=n_pre, + N_CONTROL=n_control, + ) + data = simulate_data(0.0, DEFAULT_SEED, constants=constants) + to_assign_ytr = np.ones(shape=(n_pre, 1)) + to_assign_yte = np.ones(shape=(n_post, 1)) + + reassigned_data = reassign_treatment(data, to_assign_ytr, to_assign_yte) + assert not data == reassigned_data + np.testing.assert_equal(reassigned_data.ytr, to_assign_ytr) + np.testing.assert_equal(reassigned_data.yte, to_assign_yte)