In [3]:
import numpy as np
import pandas as pd
from pathlib import Path

# =========================
# Configuration
# =========================

RANDOM_SEED = 123

# Structure of the test suite
N_TEST_CASES = 40              # number of distinct TestCaseId values
ASSERTIONS_PER_CASE = 5        # assertions per test case

# Repetitions per (TestCaseId, AssertionId) pair and period
N_PREV_RUNS_PER_PAIR = 5
N_NEXT_RUNS_PER_PAIR = 5

# Baseline probability hyperparameters
# We'll draw a baseline logit for each pair:
#   eta_j ~ Normal(MU_ETA, SIGMA_ETA)
#   p_prev_j = logistic(eta_j)
MU_ETA = 0.0                   # ~ 0.5 baseline probability on average
SIGMA_ETA = 1.0                # heterogeneity across pairs

# True treatment effect (global logit shift)
# Next-period logit is: eta_j + TAU_LOGIT_TRUE
TAU_LOGIT_TRUE = 0.5           # positive means "next" is better on average

# Optional heterogeneity in the treatment effect by pair
HETEROGENEOUS_EFFECTS = True
SIGMA_TAU_PAIR = 0.2           # std dev of pair-specific deviation


# Output paths
DATA_DIR = Path("data")
PREVIOUS_OUT = DATA_DIR / "assertions_previous.csv"
NEXT_OUT = DATA_DIR / "assertions_next.csv"


# =========================
# Helper functions
# =========================

def logistic(x: np.ndarray) -> np.ndarray:
    return 1.0 / (1.0 + np.exp(-x))


def simulate():
    np.random.seed(RANDOM_SEED)
    DATA_DIR.mkdir(parents=True, exist_ok=True)

    # Create all (TestCaseId, AssertionId) pairs
    test_cases = [f"TC{i+1:03d}" for i in range(N_TEST_CASES)]
    assertions = [f"AS{j+1:03d}" for j in range(ASSERTIONS_PER_CASE)]

    rows_prev = []
    rows_next = []

    pair_params = []

    run_id_counter = 1

    for tc in test_cases:
        for asrt in assertions:
            # Baseline logit and probability for this pair
            eta = np.random.normal(MU_ETA, SIGMA_ETA)
            p_prev = logistic(eta)

            # True treatment effect for this pair
            if HETEROGENEOUS_EFFECTS:
                tau_j = TAU_LOGIT_TRUE + np.random.normal(0.0, SIGMA_TAU_PAIR)
            else:
                tau_j = TAU_LOGIT_TRUE

            eta_next = eta + tau_j
            p_next = logistic(eta_next)

            # Store "true" parameters for bookkeeping
            pair_params.append({
                "TestCaseId": tc,
                "AssertionId": asrt,
                "eta_prev": eta,
                "p_prev": p_prev,
                "tau_j": tau_j,
                "eta_next": eta_next,
                "p_next": p_next,
                "true_delta": p_next - p_prev,
            })

            # Simulate previous period runs
            for _ in range(N_PREV_RUNS_PER_PAIR):
                y = np.random.binomial(1, p_prev)
                rows_prev.append({
                    "RunId": run_id_counter,
                    "TestCaseId": tc,
                    "AssertionId": asrt,
                    "IsTrue": y,
                })
                run_id_counter += 1

            # Simulate next period runs
            for _ in range(N_NEXT_RUNS_PER_PAIR):
                y = np.random.binomial(1, p_next)
                rows_next.append({
                    "RunId": run_id_counter,
                    "TestCaseId": tc,
                    "AssertionId": asrt,
                    "IsTrue": y,
                })
                run_id_counter += 1

    prev_df = pd.DataFrame(rows_prev)
    next_df = pd.DataFrame(rows_next)
    pair_df = pd.DataFrame(pair_params)

    # Write out CSVs in the format your analysis code expects
    prev_df.to_csv(PREVIOUS_OUT, index=False)
    next_df.to_csv(NEXT_OUT, index=False)

    # Compute and print "true" effects
    global_prev = pair_df["p_prev"].mean()
    global_next = pair_df["p_next"].mean()
    global_delta = pair_df["true_delta"].mean()

    print("Simulation complete.")
    print(f"Files written:")
    print(f"  Previous: {PREVIOUS_OUT}")
    print(f"  Next:     {NEXT_OUT}")
    print()
    print("True underlying parameters (population-level):")
    print(f"  MU_ETA          = {MU_ETA:.3f}")
    print(f"  SIGMA_ETA       = {SIGMA_ETA:.3f}")
    print(f"  TAU_LOGIT_TRUE  = {TAU_LOGIT_TRUE:.3f}")
    if HETEROGENEOUS_EFFECTS:
        print(f"  SIGMA_TAU_PAIR  = {SIGMA_TAU_PAIR:.3f}")
    print()
    print("Derived true probabilities:")
    print(f"  mean(p_prev)    = {global_prev:.4f}")
    print(f"  mean(p_next)    = {global_next:.4f}")
    print(f"  mean(p_next - p_prev) = {global_delta:.4f}")
    print()
    print("First few per-pair true parameters:")
    print(pair_df.head())

In [4]:
simulate()

Simulation complete.
Files written:
  Previous: data/assertions_previous.csv
  Next:     data/assertions_next.csv

True underlying parameters (population-level):
  MU_ETA          = 0.000
  SIGMA_ETA       = 1.000
  TAU_LOGIT_TRUE  = 0.500
  SIGMA_TAU_PAIR  = 0.200

Derived true probabilities:
  mean(p_prev)    = 0.5370
  mean(p_next)    = 0.6338
  mean(p_next - p_prev) = 0.0968

First few per-pair true parameters:
  TestCaseId AssertionId  eta_prev    p_prev     tau_j  eta_next    p_next  \
0      TC001       AS001 -1.085631  0.252442  0.699469 -0.386162  0.404642   
1      TC001       AS002 -0.678886  0.336510  0.481058 -0.197828  0.450704   
2      TC001       AS003 -0.935834  0.281743  0.735166 -0.200668  0.450001   
3      TC001       AS004  0.927462  0.716560  0.465273  1.392735  0.801029   
4      TC001       AS005  2.392365  0.916243  0.582582  2.974948  0.951429   

   true_delta  
0    0.152200  
1    0.114194  
2    0.168258  
3    0.084468  
4    0.035186  
