In [1]:
from hypex.dataset import Dataset, InfoRole, TargetRole, TreatmentRole, FeatureRole
from hypex.utils.tutorial_data_creation import DataGenerator
from hypex.dataset import ABCRole, ExperimentData, TempTargetRole

In [2]:
# Generate synthetic data with 2 historical periods using built-in DataGenerator
gen = DataGenerator(
    n_samples=2000,
    distributions={
        "X1": {"type": "normal", "mean": 0, "std": 1},
        "X2": {"type": "bernoulli", "p": 0.5},
        "y0": {"type": "normal", "mean": 5, "std": 1},
    },
    time_correlations={"X1": 0.2, "X2": 0.1, "y0": 0.6},
    effect_size=2.0,
    seed=42
)

df = gen.generate()
# Keep only the columns we need for 2-period CUPAC
df = df.drop(columns=['y0', 'z', 'U', 'D', 'y1'])
df = df.rename(columns={'y0_lag_1': 'y_lag1', 'y0_lag_2': 'y_lag2'})

print("Generated columns:", df.columns.tolist())
print("Dataset shape:", df.shape)
print("\nCorrelations between periods:")
print(f"y_lag2 -> y_lag1: {df['y_lag2'].corr(df['y_lag1']):.3f}")
print(f"y_lag1 -> y: {df['y_lag1'].corr(df['y']):.3f}")
df.head()

Generated columns: ['d', 'X1_lag1', 'X1_lag2', 'X2_lag1', 'X2_lag2', 'y_lag1', 'y_lag2', 'y']
Dataset shape: (2000, 8)

Correlations between periods:
y_lag2 -> y_lag1: 0.793
y_lag1 -> y: 0.606


Unnamed: 0,d,X1_lag1,X1_lag2,X2_lag1,X2_lag2,y_lag1,y_lag2,y
0,0,-0.656927,-0.674978,0,0,3.895067,2.394255,4.752407
1,0,-1.00463,-0.880887,1,0,3.068464,3.769504,4.246357
2,1,-1.097898,-0.030517,1,1,4.229747,3.919514,9.546954
3,1,-0.22364,0.350105,1,0,6.413378,5.828773,5.073932
4,0,2.107403,2.170936,1,0,5.477219,5.830851,3.436856


In [3]:
# Create HypEx dataset with features_mapping for 2-period multilevel CUPAC
data = Dataset(
    roles = {
    "d": TreatmentRole(),
    "y": TargetRole(cofounders=["X1", "X2"]),

    "y_lag1": TargetRole(parent="y", lag=1),
    "X1_lag1": FeatureRole(parent="X1", lag=1),
    "X2_lag1": FeatureRole(parent="X2", lag=1),

    "y_lag2": TargetRole(parent="y", lag=2),
    "X1_lag2": FeatureRole(parent="X1", lag=2),
    "X2_lag2": FeatureRole(parent="X2", lag=2),
    },
    data=df,
    default_role=InfoRole(),
)

In [4]:
from hypex.ml import CUPACExecutor
executor = CUPACExecutor(cupac_models=None)

In [5]:
exp_data = ExperimentData(data)
experiment_data = executor.execute(exp_data)

{'y': {'X_train': [['X1_lag2'], ['X2_lag2'], ['y_lag2']], 'Y_train': ['y_lag1'], 'X_predict': [['X1_lag1'], ['X2_lag1'], ['y_lag1']]}}
        y_cupac
0      6.474663
1      6.571215
2     11.045539
3      4.886739
4      3.631880
...         ...
1995   6.362698
1996   5.717659
1997   5.785920
1998   6.097312
1999   5.527308

[2000 rows x 1 columns]
