This notebook shows how to use the switchback module. In particular, it shows how to create a PowerAnalysis object with a switchback splitter, using a time column and 30 min splits.

It uses the splitter of the PowerAnalysis object to simulate the treatment assignment, and shows how the stratification of the clusters works.

In the end, it also shows how to run the power analysis.

In [None]:
from cluster_experiments import PowerAnalysis
import pandas as pd
import numpy as np

np.random.seed(42)

In [None]:
# Define bihourly switchback splitter
config = {
    "time_col": "time",
    "switch_frequency": "30min",
    "perturbator": "uniform",
    "analysis": "pairedttest",
    "splitter": "switchback_stratified",
    "cluster_cols": ["time", "city"],
    "strata_cols": ["city"],
    "target_col": "y",
}

power = PowerAnalysis.from_dict(config)

In [None]:
# Define data with random dates
df_raw = pd.DataFrame(
    {
        # Generate 10k random timestamps from 2021-01-01 to 2021-01-10
        "time": pd.date_range("2021-01-01", "2021-01-08", freq="1min")[
            np.random.randint(7 * 24 * 60, size=7 * 24 * 60)
        ],
        "y": np.random.randn(7 * 24 * 60),
    }
).assign(
    day_of_week=lambda df: df.time.dt.dayofweek,
    hour_of_day=lambda df: df.time.dt.hour
)
df = pd.concat([df_raw.assign(city=city) for city in ("TGN", "NYC", "LON", "REU")])

In [None]:
df.head(10)

In [None]:
treatments = power.splitter.assign_treatment_df(df)

In [None]:
# For every city, we have a balanced AB split
(
    treatments
    .loc[:, ["city", "treatment", "time"]]
    .drop_duplicates()
    .groupby(["city", "treatment"])
    .size()
    .head(10)
)

In [None]:
# For every hour of day, we have a balanced AB split
(
    treatments
    .loc[:, ["city", "treatment", "time", "hour_of_day"]]
    .drop_duplicates()
    .groupby(["hour_of_day", "treatment"])
    .size()
    .head(10)
)

In [None]:
# For every day of week, we have a balanced AB split
(
    treatments
    .loc[:, ["city", "treatment", "time", "day_of_week"]]
    .drop_duplicates()
    .groupby(["day_of_week", "treatment"])
    .size()
    .head(10)
)

In [None]:
# In the first 30 minutes of the day, LON, NYC, REU, and TGN have a constant treatment
treatments.query("time < '2021-01-01 00:30:00'").groupby(["city", "treatment"]).size()

In [None]:
# We can run power analysis
power.power_analysis(df, average_effect=0.01)

