In [1]:
%load_ext autoreload
%autoreload complete

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from matplotlib_inline.backend_inline import set_matplotlib_formats
from tqdm.notebook import tqdm

set_matplotlib_formats("svg")

In [27]:
from dowhy import CausalModel
from dowhy.causal_estimators.propensity_score_weighting_estimator import (
    PropensityScoreWeightingEstimator,
)
from sklearn.linear_model import LogisticRegression

from src.features.aggregation import naive_all_regions

data = naive_all_regions()
y = data.y[0][["media_protest"]]
common_causes = ["is_holiday"] + [
    c for c in data.x[0].columns if c.startswith("weekday")
]
X = data.x[0][["occ_FFF"] + common_causes]
df = y.join(X)
model = CausalModel(
    data=df,
    treatment="occ_FFF",
    outcome="media_protest",
    graph=None,
    common_causes=common_causes,
)
estimand = model.identify_effect()
estimator = PropensityScoreWeightingEstimator(
    estimand, propensity_score_model=LogisticRegression()
).fit(df)
estimate = estimator.estimate_effect(data=df, target_units="att")
print(estimate)

  0%|          | 0/16 [00:00<?, ?it/s]

*** Causal Estimate ***

## Identified estimand
Estimand type: EstimandType.NONPARAMETRIC_ATE

## Realized estimand
b: media_protest~occ_FFF+is_holiday+weekday_Sunday+weekday_Thursday+weekday_Tuesday+weekday_Wednesday+weekday_Saturday+weekday_Monday
Target units: att

## Estimate
Mean value: 9.724385510260781

