Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 129 additions & 21 deletions docs/examples/azcausal.ipynb

Large diffs are not rendered by default.

192 changes: 155 additions & 37 deletions docs/examples/basic.ipynb

Large diffs are not rendered by default.

204 changes: 182 additions & 22 deletions docs/examples/placebo_test.ipynb

Large diffs are not rendered by default.

16 changes: 10 additions & 6 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ trend varies across each of the 10 control units.
from causal_validation import Config, simulate
from causal_validation.effects import StaticEffect
from causal_validation.plotters import plot
from causal_validation.transforms import Trend, Periodic
from causal_validation.transforms import Trend
from causal_validation.transforms.parameter import UnitVaryingParameter
import numpy as np
from scipy.stats import norm

cfg = Config(
n_control_units=10,
n_pre_intervention_timepoints=60,
n_post_intervention_timepoints=30,
)
# Treatment assignment matrix
D = np.zeros((90, 11)) # 90 time points, 11 units
D[60:, -1] = 1 # Last unit treated after 60 time points

cfg = Config(treatment_assignments=D)

# Simulate the base observation
base_data = simulate(cfg)
Expand All @@ -38,6 +39,8 @@ trended_data = trend_component(base_data)
# Simulate a 5% lift in the treated unit's post-intervention data
effect = StaticEffect(0.05)
inflated_data = effect(trended_data)

plot(inflated_data)
```

![Gaussian process posterior.](static/imgs/readme_fig.png)
Expand All @@ -50,6 +53,7 @@ combination with AZCausal by the following.

```python
from azcausal.estimators.panel.sdid import SDID
from causal_validation.estimator.utils import AZCausalWrapper
from causal_validation.validation.placebo import PlaceboTest

model = AZCausalWrapper(model=SDID())
Expand Down
86 changes: 50 additions & 36 deletions src/causal_validation/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from dataclasses import (
dataclass,
field,
)
import datetime as dt
import typing as tp
Expand All @@ -11,84 +10,99 @@

from causal_validation.types import (
Number,
WeightTypes,
TreatedSimulationTypes,
)
from causal_validation.weights import UniformWeights


@dataclass(kw_only=True, frozen=True)
class WeightConfig:
weight_type: "WeightTypes" = field(default_factory=UniformWeights)


@dataclass(kw_only=True)
class Config:
"""Configuration for causal data generation.

Args:
n_control_units (int): Number of control units in the synthetic dataset.
n_pre_intervention_timepoints (int): Number of time points before intervention.
n_post_intervention_timepoints (int): Number of time points after intervention.
treatment_assignments (Float[np.ndarray, "T N"]): Treatment assignments for T
time steps and N units. Only supported with binary assignments.
treated_simulation_type ("TreatedSimulationTypes"): Treated units can be
simulated either "independent" of control units or "control-weighted",
where waiting scheme is controlled by Dirichlet concentration parameter.
Set to "control-weighted" by default.
dirichlet_concentration (Number): Dirichlet parameters are set to a vector of
dirichlet_concentration with length number of control units. This parameter
controls how dense and sparse the generated weights are. Set to 1 by default
and in effect only if treated_simulation_type is "control-weighted".
n_covariates (Optional[int]): Number of covariates. Defaults to None.
covariate_means (Optional[Float[np.ndarray, "D K"]]): Mean values for covariates
D is n_control_units and K is n_covariates. Defaults to None. If it is set
to None while n_covariates is provided, covariate_means will be generated
covariate_means (Optional[np.ndarray]): Normal dist. mean values for covariates.
The lenght must be n_covariates. Defaults to None. If it is set to
None while n_covariates is provided, covariate_means will be generated
randomly from Normal distribution.
covariate_stds (Optional[Float[np.ndarray, "D K"]]): Standard deviations for
covariates. D is n_control_units and K is n_covariates. Defaults to None.
If it is set to None while n_covariates is provided, covariate_stds
will be generated randomly from Half-Cauchy distribution.
covariate_stds (Optional[np.ndarray]): Normal dist. std values for covariates.
The lenght must be n_covariates. Defaults to None. If it is set to
None while n_covariates is provided, covariate_stds will be generated
randomly from Half-Cauchy distribution.
covariate_coeffs (Optional[np.ndarray]): Linear regression
coefficients to map covariates to output observations. K is n_covariates.
Defaults to None.
global_mean (Number): Global mean for data generation. Defaults to 20.0.
global_scale (Number): Global scale for data generation. Defaults to 0.2.
start_date (dt.date): Start date for time series. Defaults to 2023-01-01.
seed (int): Random seed for reproducibility. Defaults to 123.
weights_cfg (WeightConfig): Configuration for unit weights. Defaults to
UniformWeights.
weights (Optional[list[np.ndarray]]): Length num of treateds list of weights.
Each element is length num of control, indicating how to weigh control
units to generate treated.
"""

n_control_units: int
n_pre_intervention_timepoints: int
n_post_intervention_timepoints: int
treatment_assignments: Float[np.ndarray, "T N"]
treated_simulation_type: "TreatedSimulationTypes" = "control-weighted"
dirichlet_concentration: Number = 1.0
n_covariates: tp.Optional[int] = None
covariate_means: tp.Optional[Float[np.ndarray, "D K"]] = None
covariate_stds: tp.Optional[Float[np.ndarray, "D K"]] = None
covariate_means: tp.Optional[np.ndarray] = None
covariate_stds: tp.Optional[np.ndarray] = None
covariate_coeffs: tp.Optional[np.ndarray] = None
global_mean: Number = 20.0
global_scale: Number = 0.2
start_date: dt.date = dt.date(year=2023, month=1, day=1)
seed: int = 123
weights_cfg: WeightConfig = field(default_factory=WeightConfig)
weights: tp.Optional[list[np.ndarray]] = None

def __post_init__(self):
self.rng = np.random.RandomState(self.seed)
if self.covariate_means is not None:
assert self.covariate_means.shape == (
self.n_control_units,
self.n_covariates,
)
assert self.covariate_means.shape == (self.n_covariates,)

if self.covariate_stds is not None:
assert self.covariate_stds.shape == (
self.n_control_units,
self.n_covariates,
)
assert self.covariate_stds.shape == (self.n_covariates,)

if (self.n_covariates is not None) & (self.covariate_means is None):
self.covariate_means = self.rng.normal(
loc=0.0, scale=5.0, size=(self.n_control_units, self.n_covariates)
loc=0.0, scale=5.0, size=(self.n_covariates)
)

if (self.n_covariates is not None) & (self.covariate_stds is None):
self.covariate_stds = halfcauchy.rvs(
scale=0.5,
size=(self.n_control_units, self.n_covariates),
size=(self.n_covariates),
random_state=self.rng,
)

if (self.n_covariates is not None) & (self.covariate_coeffs is None):
self.covariate_coeffs = self.rng.normal(
loc=0.0, scale=5.0, size=self.n_covariates
)

n_units = self.treatment_assignments.shape[1]
treated_units = [
i for i in range(n_units) if any(self.treatment_assignments[:, i] != 0)
]
n_treated_units = len(treated_units)
n_control_units = n_units - n_treated_units

if self.treated_simulation_type == "control-weighted":
if self.weights is None:
self.weights = [
self.rng.dirichlet(
self.dirichlet_concentration * np.ones(n_control_units)
)
for _ in range(n_treated_units)
]
else:
assert len(self.weights) == n_treated_units
assert all([len(w) == n_control_units for w in self.weights])
Loading