In [None]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd

In [None]:
# Set random seed for reproducibility 
#Create 50000 data points with input attributes
np.random.seed(42)

# Define total rows
num_rows = 50000

# 1. Userids eligible for the test (50% test, 50% control)
df = pd.DataFrame({"Test_Eligibility": np.random.choice(["Test", "Control"], size=num_rows, p=[0.5, 0.5])})

# 2. Users must visit the site
df["Users_Visit_Site"] = np.random.binomial(1, 0.8, size=len(df)) 

# 2. Users must visit the site
df["Users_Visit_Homepage"] =  df["Users_Visit_Site"] * np.random.binomial(1, 0.9, size=len(df))  # 90% of site visitors start journey in Homepage

# 3. Users must be eligible for delivery
df["Delivery_Eligible"] = df["Users_Visit_Homepage"] * np.random.binomial(1, 0.7, size=len(df))  # 60% of site visitors are eligible

# 4. Users must view the banner
df["Users_View_Banner"] = df["Delivery_Eligible"] * np.random.binomial(1, 0.8, size=len(df))  # 40% of eligible users see the banner

# 5. Users must click the banner (only those who viewed it)
df["Users_Click_Banner"] = df["Users_View_Banner"] * np.random.binomial(1, 0.4, size=len(df))  # 25% of those who view click

# 5. Users must click the banner (only those who viewed it)
df["Users_redeemed"] = df["Users_Click_Banner"] * np.random.binomial(1, 0.3, size=len(df))  # 15% of those who click redeem

# 6. Account tenure (Continuous integer, range 0 to 180 days)
df['account_tenure'] = np.random.randint(0, 181, num_rows)

# 7. Platform created account ("App" or "Web")
df['platform'] = np.random.choice(["App", "Web"], size=num_rows, p=[0.5, 0.5])

# 8. Age (Assuming range 18-70)
df['age'] = np.random.randint(18, 71, num_rows)

# 9. Income (Discrete values in thousands)
df['income'] = np.random.choice([25000, 50000, 75000, 100000, 125000], size=num_rows, p=[0.2, 0.3, 0.3, 0.15, 0.05])
df.columns

In [None]:
# Introduce geterogenity in the data with for feeding into Meta learners
# 1. Young users clicking the banner and converting at a higher rate
# 2. Higher click and conversion rates for lower and medium income
# 3. Higher click and conversion rates for recent users (joined within 2 months, max within 2 weeks)

# Higher click and conversion rates for age between 25 to 40
age_condition = (df['age'] >= 25) & (df['age'] <= 40)
df.loc[age_condition, 'Users_Click_Banner'] = np.random.choice(
    [1, 0], size=age_condition.sum(), p=[0.3, 0.7]
)
df.loc[age_condition, 'Converted'] = np.random.choice(
    [1, 0], size=age_condition.sum(), p=[0.2, 0.8]
)

# Higher click and conversion rates for lower and medium income
income_condition = df['income'] <= 70000  # Assuming medium income threshold is 70,000
df.loc[income_condition, 'Users_Click_Banner'] = np.random.choice(
    [1, 0], size=income_condition.sum(), p=[0.25, 0.75]
)
df.loc[income_condition, 'Converted'] = np.random.choice(
    [1, 0], size=income_condition.sum(), p=[0.15, 0.85]
)

# Higher click and conversion rates for recent users (joined within 2 months, max within 2 weeks)
tenure_condition_2_months = df['account_tenure'] <= 60
tenure_condition_2_weeks = df['account_tenure'] <= 14

# Assign higher click and conversion rates for users joined within 2 weeks
df.loc[tenure_condition_2_weeks, 'Users_Click_Banner'] = np.random.choice(
    [1, 0], size=tenure_condition_2_weeks.sum(), p=[0.4, 0.6]
)
df.loc[tenure_condition_2_weeks, 'Converted'] = np.random.choice(
    [1, 0], size=tenure_condition_2_weeks.sum(), p=[0.3, 0.7]
)

# Assign moderately higher click and conversion rates for users joined within 2 months
df.loc[tenure_condition_2_months & ~tenure_condition_2_weeks, 'Users_Click_Banner'] = np.random.choice(
    [1, 0], size=(tenure_condition_2_months & ~tenure_condition_2_weeks).sum(), p=[0.3, 0.7]
)
df.loc[tenure_condition_2_months & ~tenure_condition_2_weeks, 'Converted'] = np.random.choice(
    [1, 0], size=(tenure_condition_2_months & ~tenure_condition_2_weeks).sum(), p=[0.2, 0.8]
)
print(df.groupby("Test_Eligibility")[["Users_View_Banner", "Users_Click_Banner", "Users_redeemed"]].sum())

In [None]:
#Ensure only minimal no of  control users have non-zero values for banner views, clicks, and redemptions as this banner is only available to Teat users
control_indices = df[df["Test_Eligibility"] == "Control"].index

# Select 10 random control users to have non-zero values
selected_indices = np.random.choice(control_indices, size=10, replace=False)

# Set banner views, clicks, and redemptions for all control users to 0 first
df.loc[df["Test_Eligibility"] == "Control", ["Users_View_Banner", "Users_Click_Banner", "Users_redeemed"]] = 0

# Assign non-zero values only to the selected 10 users
df.loc[selected_indices, "Users_View_Banner"] = 1  # Assuming a binary flag for viewing
df.loc[selected_indices, "Users_Click_Banner"] = np.random.choice([0, 1], size=10, p=[0.5, 0.5])  # Random clicks among selected
df.loc[selected_indices, "Users_redeemed"] = np.random.choice([0, 1], size=10, p=[0.5, 0.5])  # Random redemptions among selected

# Print summary
print(df.groupby("Test_Eligibility")[["Users_View_Banner", "Users_Click_Banner", "Users_redeemed"]].sum())

In [None]:
# In order to demonstrate the lift in test users compare dto control users by a significant amount the conversion rates and average orders
# were reindexed.
conversion_rate_control = 0.10  # Example base conversion rate for Control
conversion_rate_test = conversion_rate_control + 0.03  # Test has 3% higher conversion

# Generate conversion column based on test/control assignment
df["Converted"] = np.where(
    df["Test_Eligibility"] == "Test",
    np.random.binomial(1, conversion_rate_test, num_rows),
    np.random.binomial(1, conversion_rate_control, num_rows)
)

# Define orders probability when users have converted
order_rate_control = 0.50  # Example base rate for orders in Control
order_rate_test = order_rate_control + 0.022  # Test orders higher by 2.2%

# Generate orders column only for converted users
df["Orders"] = np.where(
    df["Converted"] == 1,
    np.where(
        df["Test_Eligibility"] == "Test",
        np.random.poisson(order_rate_test * 10, num_rows),  # Scale up to 10 orders max
        np.random.poisson(order_rate_control * 10, num_rows)
    ),
    0
)

# Define delivery orders probability when users have converted
delivery_rate_control = 0.30  # Example base rate for delivery orders in Control
delivery_rate_test = delivery_rate_control + 0.04  # Test delivery orders higher by 4%

# Generate delivery orders column only for converted users
df["Delivery_Orders"] = np.where(
    df["Converted"] == 1,
    np.where(
        df["Test_Eligibility"] == "Test",
        np.random.poisson(delivery_rate_test * 10, num_rows),  # Scale up to 10 delivery orders max
        np.random.poisson(delivery_rate_control * 10, num_rows)
    ),
    0
)
df.head(2),df.columns