# R&D CUPAC CUPED Wald estimator

## Генерация данных

In [1]:
import pandas as pd
import numpy as np
from data_gen import DataGenerator

Нормальные X и y, слабые корреляции

In [2]:
gen1 = DataGenerator(
    n_samples=2000,
    distributions={
        "X1": {"type": "normal", "mean": 0, "std": 1},
        "X2": {"type": "bernoulli", "p": 0.5},
        "y0": {"type": "normal", "mean": 5, "std": 1},
    },
    time_correlations={"X1": 0.2, "X2": 0.1, "y0": 0.3},
    effect_size=2.0,
    seed=7
)
df1 = gen1.generate()

Высокая автокорреляция и гамма-распределение для Y

In [3]:
gen2 = DataGenerator(
    n_samples=2000,
    distributions={
        "X1": {"type": "normal", "mean": 1, "std": 2},
        "X2": {"type": "bernoulli", "p": 0.3},
        "y0": {"type": "gamma", "shape": 2.0, "scale": 3.0},
    },
    time_correlations={"X1": 0.8, "X2": 0.4, "y0": 0.9},
    effect_size=3.0,
    seed=7
)
df2 = gen2.generate()

Сильно смещенное распределение X1 и слабый эффект

In [4]:
gen3 = DataGenerator(
    n_samples=3000,
    distributions={
        "X1": {"type": "normal", "mean": 10, "std": 5},
        "X2": {"type": "bernoulli", "p": 0.7},
        "y0": {"type": "normal", "mean": 0, "std": 1},
    },
    time_correlations={"X1": 0.5, "X2": 0.4, "y0": 0.6},
    effect_size=0.5,
    seed=7
)
df3 = gen3.generate()

Сильный эффект и Bernoulli-переменные

In [5]:
gen4 = DataGenerator(
    n_samples=1500,
    distributions={
        "X1": {"type": "bernoulli", "p": 0.2},
        "X2": {"type": "bernoulli", "p": 0.6},
        "y0": {"type": "normal", "mean": 3, "std": 2},
    },
    time_correlations={"X1": 0.2, "X2": 0.5, "y0": 0.4},
    effect_size=7.0,
    seed=7
)
df4 = gen4.generate()

Гамма Y, нормальный X1, умеренный эффект

In [6]:
gen5 = DataGenerator(
    n_samples=2500,
    distributions={
        "X1": {"type": "normal", "mean": 0, "std": 3},
        "X2": {"type": "bernoulli", "p": 0.5},
        "y0": {"type": "gamma", "shape": 5.0, "scale": 1.0},
    },
    time_correlations={"X1": 0.3, "X2": 0.2, "y0": 0.85},
    effect_size=4.0,
    seed=7
)
df5 = gen5.generate()

In [7]:
dfs = [
    df.drop(columns=['y0', 'z', 'U', 'D', 'y1'])
    for df in [df1, df2, df3, df4, df5]
]

In [8]:
dfs[0]

Unnamed: 0,X1,X1_lag,X2,X2_lag,y0_lag_1,y0_lag_2,d,y
0,-1.014791,-1.604160,0,0,4.058503,4.800092,1,7.127424
1,-0.283158,0.232314,1,1,5.436999,6.863391,1,4.824422
2,0.609791,0.612404,1,0,3.760400,5.603602,1,2.880424
3,1.110475,-1.109095,0,1,4.842997,4.502296,1,5.379159
4,-1.168063,-0.408486,0,0,5.898899,6.235804,0,4.828901
...,...,...,...,...,...,...,...,...
1995,-0.827531,0.758988,1,1,4.732813,3.751596,0,4.014402
1996,-2.404596,-0.981092,0,0,6.181040,4.124538,1,8.708952
1997,-0.094702,0.415405,1,0,3.955750,5.582502,0,4.909073
1998,-1.389898,-0.108940,0,1,4.534923,3.228373,0,2.082938


## Метрики сравнения реализаций CUPAC и CUPED

| **Критерий**                      | **Метрика**                                    |
|----------------------------------|-----------------------------------------------------|
| **ATE** | Должна быть одинаковой у всех корректных реализаций |
| **Standard Error (SE)**           | CUPAC должен её уменьшать                           |
| **Доверительный интервал (CI)**   | Чем уже — тем лучше                                 |
| **p-value**                       | CUPAC должен снижать p-value при одинаковом эффекте |
| **Время выполнения**              | Особенно важно при больших объёмах                  |

In [9]:
GLOBAL_RESULTS = pd.DataFrame(columns=[
    "experiment", "df",
    "control mean", "test mean", "difference",
    "ate_ci_lower", "ate_ci_upper", "ate_ci_delt", "p_value"
])

## Модели

### HypEx

In [10]:
from hypex import ABTest
from hypex.dataset import Dataset, InfoRole, TargetRole, TreatmentRole

class HypExExperiment:
    def __init__(self):
        self.name = 'HypEx'
        self.counter = 1
        self.result = None

    def execute(self, df: pd.DataFrame):
        data = Dataset(
            roles={
                "d": TreatmentRole(),
                "y": TargetRole(),
            },
            data=df,
            default_role=InfoRole()
        )

        test = ABTest()
        self.result = test.execute(data)

    def save_result(self):
        global GLOBAL_RESULTS
        if self.result is None:
            return

        row = self.result.resume.data.iloc[0]
        t_stat = self.result._experiment_data.analysis_tables['TTest┴┴y'].data['statistic'].iloc[0]
        SE = abs(row["difference"] / t_stat)
        ci_lower = row["difference"] - 1.9599611504231855 * SE
        ci_upper = row["difference"] + 1.96 * SE
        summary = {
            "experiment": self.name,
            "df": f"df{self.counter}",
            "control mean": row["control mean"],
            "test mean": row["test mean"],
            "difference": row["difference"],
            "ate_ci_lower": ci_lower,
            "ate_ci_upper": ci_upper,
            "p_value": row["TTest p-value"]
        }

        GLOBAL_RESULTS = pd.concat([GLOBAL_RESULTS, pd.DataFrame([summary])], ignore_index=True)
        self.counter += 1
        

### AutoCUPAC + HypEx

In [11]:
from autocupac import CUPACTransformer
from hypex import ABTest
from hypex.dataset import Dataset, InfoRole, TargetRole, TreatmentRole

class CupacHypExExperiment(HypExExperiment):
    def __init__(self):
        self.name = 'HypExCupac'
        self.counter = 1
        self.result = None

    def execute(self, df: pd.DataFrame):
        transformer = CUPACTransformer(target_col='y')
        transformer.fit(df)
        transformed_data = transformer.transform(df)

        transformed_data = transformed_data.drop(columns=['y'])
        transformed_data = transformed_data.rename(columns={'y_cupac': 'y'})

        data = Dataset(
            roles={
                "d": TreatmentRole(),
                "y": TargetRole(),
            },
            data=transformed_data,
            default_role=InfoRole()
        )

        test = ABTest()
        self.result = test.execute(data)

### ambrosia + HypEx

In [12]:
from ambrosia.preprocessing import MultiCuped
from hypex import ABTest
from hypex.dataset import Dataset, InfoRole, TargetRole, TreatmentRole

class AmbrosiaCupacExperiment(HypExExperiment):
    def __init__(self):
        self.name = 'AmbrosiaCupac'
        self.counter = 1
        self.result = None

    def execute(self, df: pd.DataFrame):
        cuped = MultiCuped(verbose=False)
        cuped.fit(
                df,
                'y',
                ["X1_lag", "X2_lag", "y0_lag_1", "y0_lag_2"])
        transformed_data = cuped.transform(df)

        transformed_data = transformed_data.drop(columns=['y'])
        transformed_data = transformed_data.rename(columns={'y_transformed': 'y'})

        data = Dataset(
            roles={
                "d": TreatmentRole(),
                "y": TargetRole(),
            },
            data=transformed_data,
            default_role=InfoRole()
        )

        test = ABTest()
        self.result = test.execute(data)

### cluster_experiments

In [13]:
import numpy as np
from cluster_experiments import (
    AnalysisPlan, SimpleMetric, Variant,
    HypothesisTest, TargetAggregation
)

class CupacClusterExperiment:
    def __init__(self):
        self.name = 'Cluster'
        self.counter = 1

    def execute(self, df: pd.DataFrame):
        cur = df.copy()
        cur["user_id"] = np.arange(len(cur))
        cur = cur.rename(columns={"d": "variant"})
        cur["variant"] = cur["variant"].replace({
            0: "control",
            1: "treatment"
        })

        pre_cur = cur[["user_id", "y0_lag_1", "y0_lag_2"]].copy()
        pre_cur = pre_cur.melt(
            id_vars=["user_id"],
            value_vars=["y0_lag_1", "y0_lag_2"],
            value_name="y"
        )
        pre_cur = pre_cur.drop(columns=["variable"])

        cupac_model = TargetAggregation(
            agg_col="user_id",
            target_col="y"
        )

        hypothesis_test = HypothesisTest(
            metric=SimpleMetric(alias="AOV", name="y"),
            analysis_type="clustered_ols",
            analysis_config={
                "cluster_cols": ["user_id"],
                "covariates": ["X1", "X1_lag","X2", "X2_lag","estimate_y"],
            },
            cupac_config={
                "cupac_model": cupac_model,
                "target_col": "y",
            },
        )

        plan = AnalysisPlan(
            tests=[hypothesis_test],
            variants=[
                Variant("control", is_control=True),
                Variant("treatment", is_control=False),
            ],
            variant_col="variant",
        )

        results = plan.analyze(cur, pre_cur)
        self.result_df = results.to_dataframe()

    def save_result(self):
        global GLOBAL_RESULTS
        if not hasattr(self, "result_df"):
            return

        row = self.result_df.iloc[0]
        summary = {
            "experiment": self.name,
            "df": f"df{self.counter}",
            "control mean": row["control_variant_mean"],
            "test mean": row["treatment_variant_mean"],
            "difference": row["ate"],
            "ate_ci_lower": row["ate_ci_lower"],
            "ate_ci_upper": row["ate_ci_upper"],
            "p_value": row["p_value"]
        }

        GLOBAL_RESULTS = pd.concat([GLOBAL_RESULTS, pd.DataFrame([summary])], ignore_index=True)
        self.counter += 1

### tea-tasting

In [14]:
import numpy as np
import tea_tasting as tt

class TeaTastingExperiment:
    def __init__(self):
        self.name = 'TeaTastingCuped'
        self.counter = 1
        self.result = None

    def execute(self, df: pd.DataFrame):
        cur = df.copy()
        cur["user_id"] = np.arange(len(cur))
        cur = cur.rename(columns={"d": "variant"})

        experiment = tt.Experiment(
            target=tt.Mean("y", "y0_lag_1")
        )

        self.result = experiment.analyze(cur).to_pandas()

    def save_result(self):
        global GLOBAL_RESULTS
        if self.result is None:
            return

        row = self.result.iloc[0]

        summary = {
            "experiment": self.name,
            "df": f"df{self.counter}",
            "control mean": row["control"],
            "test mean": row["treatment"],
            "difference": row["effect_size"],
            "ate_ci_lower": row["effect_size_ci_lower"],
            "ate_ci_upper": row["effect_size_ci_upper"],
            "p_value": row["pvalue"]
        }

        GLOBAL_RESULTS = pd.concat([GLOBAL_RESULTS, pd.DataFrame([summary])], ignore_index=True)
        self.counter += 1

## RESULTS

In [15]:
experiments = [HypExExperiment(), CupacHypExExperiment(), AmbrosiaCupacExperiment(), CupacClusterExperiment(), TeaTastingExperiment()]

In [16]:
for exp in experiments:
    for df in dfs:
        exp.execute(df)
        exp.save_result()

In [17]:
GLOBAL_RESULTS['ate_ci_delt'] = GLOBAL_RESULTS['ate_ci_upper'] - GLOBAL_RESULTS['ate_ci_lower'] 

In [20]:
# GLOBAL_RESULTS[GLOBAL_RESULTS['df'] == 'df2']
GLOBAL_RESULTS

Unnamed: 0,experiment,df,control mean,test mean,difference,ate_ci_lower,ate_ci_upper,ate_ci_delt,p_value
0,HypEx,df1,5.009951,7.40209,2.392139,2.233233,2.551047,0.317814,3.9913320000000004e-159
1,HypEx,df2,5.836422,9.577593,3.741171,3.336995,4.145354,0.808358,3.277947e-68
2,HypEx,df3,0.018475,1.082572,1.064097,0.935144,1.193053,0.257909,1.787055e-56
3,HypEx,df4,3.176386,10.552917,7.376531,7.112097,7.64097,0.528873,0.0
4,HypEx,df5,4.990785,9.748246,4.757462,4.542034,4.972893,0.430859,6.883546e-306
5,HypExCupac,df1,5.00619,7.409357,2.403167,2.247281,2.559056,0.311774,1.6825620000000002e-165
6,HypExCupac,df2,5.843405,9.565205,3.7218,3.321936,4.121671,0.799735,6.779485000000001e-69
7,HypExCupac,df3,0.009208,1.101272,1.092064,0.9719,1.21223,0.24033,1.52211e-67
8,HypExCupac,df4,3.15407,10.596494,7.442423,7.194486,7.690365,0.495878,0.0
9,HypExCupac,df5,5.005733,9.719487,4.713754,4.502563,4.924949,0.422386,7.274056e-311
