# abtester

This Jupyter Notebook contains the code snippets provided at the end of each calculation method of the web app. It is intended as a sandbox to play around with the functions, understand how they work, test and compare approaches and so on. Go ahead and have fun!

## Sample size

### Proportions

#### t-test

In [1]:
# Import the libraries
import math
from statsmodels.stats.proportion import proportion_effectsize
from statsmodels.stats.power import tt_ind_solve_power

In [2]:
# Define the parameters
control_proportion = 0.15
sensitivity = 0.1
alternative = "two-sided"
confidence = 0.95
power = 0.8
control_ratio = 0.5
treatment_ratio = 0.5

In [3]:
# Calculate the sample size
if alternative == "smaller":
    sensitivity *= -1
treatment_proportion = control_proportion * (1 + sensitivity)
effect_size = proportion_effectsize(
    treatment_proportion,
    control_proportion
)
alpha = 1 - confidence
ratio = treatment_ratio / control_ratio
control_sample = math.ceil(tt_ind_solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    ratio=ratio,
    alternative=alternative
))
treatment_sample = math.ceil(control_sample * ratio)

In [4]:
# Show the result
print("Sample size")
print(f"Control: {control_sample:,}")
print(f"Treatment: {treatment_sample:,}")
print(f"Total: {(control_sample + treatment_sample):,}")

Sample size
Control: 9,254
Treatment: 9,254
Total: 18,508


#### Z-test

In [5]:
# Import the libraries
import math
from statsmodels.stats.proportion import proportion_effectsize
from statsmodels.stats.power import zt_ind_solve_power

In [6]:
# Define the parameters
control_proportion = 0.15
sensitivity = 0.1
alternative = "two-sided"
confidence = 0.95
power = 0.8
control_ratio = 0.5
treatment_ratio = 0.5

In [7]:
# Calculate the sample size
if alternative == "smaller":
    sensitivity *= -1
treatment_proportion = control_proportion * (1 + sensitivity)
effect_size = proportion_effectsize(
    treatment_proportion,
    control_proportion
)
alpha = 1 - confidence
ratio = treatment_ratio / control_ratio
control_sample = math.ceil(zt_ind_solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    ratio=ratio,
    alternative=alternative
))
treatment_sample = math.ceil(control_sample * ratio)

In [8]:
# Show the result
print("Sample size")
print(f"Control: {control_sample:,}")
print(f"Treatment: {treatment_sample:,}")
print(f"Total: {(control_sample + treatment_sample):,}")

Sample size
Control: 9,253
Treatment: 9,253
Total: 18,506


### Means

#### t-test

In [9]:
# Import the libraries
import math
import pandas as pd
from statsmodels.stats.power import tt_ind_solve_power

In [10]:
# Load the CSV file
df = pd.read_csv("datasets/sample_size/dataset_a.csv")

In [11]:
# Define the parameters
sensitivity = 0.1
alternative = "two-sided"
confidence = 0.95
power = 0.8
control_ratio = 0.5
treatment_ratio = 0.5
control_mean = df["Measurement"].mean()
standard_deviation = df["Measurement"].std()

In [12]:
# Calculate the sample size
if alternative == "smaller":
    sensitivity *= -1
treatment_mean = control_mean * (1 + sensitivity)
difference = treatment_mean - control_mean
effect_size = difference / standard_deviation
alpha = 1 - confidence
ratio = treatment_ratio / control_ratio
control_sample = math.ceil(tt_ind_solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    ratio=ratio,
    alternative=alternative
))
treatment_sample = math.ceil(control_sample * ratio)

In [13]:
# Show the result
print("Sample size")
print(f"Control: {control_sample:,}")
print(f"Treatment: {treatment_sample:,}")
print(f"Total: {(control_sample + treatment_sample):,}")

Sample size
Control: 263
Treatment: 263
Total: 526


#### Z-test

In [14]:
# Import the libraries
import math
import pandas as pd
from statsmodels.stats.power import zt_ind_solve_power

In [15]:
# Load the CSV file
df = pd.read_csv("datasets/sample_size/dataset_a.csv")

In [16]:
# Define the parameters
sensitivity = 0.1
alternative = "two-sided"
confidence = 0.95
power = 0.8
control_ratio = 0.5
treatment_ratio = 0.5
control_mean = df["Measurement"].mean()
standard_deviation = df["Measurement"].std()

In [17]:
# Calculate the sample size
if alternative == "smaller":
    sensitivity *= -1
treatment_mean = control_mean * (1 + sensitivity)
difference = treatment_mean - control_mean
effect_size = difference / standard_deviation
alpha = 1 - confidence
ratio = treatment_ratio / control_ratio
control_sample = math.ceil(zt_ind_solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    ratio=ratio,
    alternative=alternative
))
treatment_sample = math.ceil(control_sample * ratio)

In [18]:
# Show the result
print("Sample size")
print(f"Control: {control_sample:,}")
print(f"Treatment: {treatment_sample:,}")
print(f"Total: {(control_sample + treatment_sample):,}")

Sample size
Control: 262
Treatment: 262
Total: 524


## Statistical significance

### Proportions

#### Z-test

In [19]:
# Import the libraries
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

In [20]:
# Define the parameters
control_users = 30000
treatment_users = 30000
control_conversions = 1202
treatment_conversions = 1298
alternative = "two-sided"
confidence = 0.95
alpha = 1 - confidence

In [21]:
# Calculate the p-value
count = np.array([treatment_conversions, control_conversions])
nobs = np.array([treatment_users, control_users])
tstat, p_value = proportions_ztest(
    count=count,
    nobs=nobs,
    alternative=alternative
)

In [22]:
# Show the result
if p_value <= alpha:
    outcome = "is"
else:
    outcome = "is not"
if round(p_value, 4) < 0.0001:
    value = "< 0.0001"
else:
    value = f"= {p_value:.4f}"
print(f"The difference {outcome} statistically significant, with a p-value {value}.")

The difference is statistically significant, with a p-value = 0.0498.


#### Permutation

In [23]:
# Import the libraries
import random
import numpy as np
import pandas as pd

In [24]:
# Define the parameters
control_users = 30000
treatment_users = 30000
control_conversions = 1202
treatment_conversions = 1298
alternative = "two-sided"
confidence = 0.95
alpha = 1 - confidence
iterations = 1000

In [25]:
# Calculate the observed difference
control_proportion = control_conversions / control_users
treatment_proportion = treatment_conversions / treatment_users
observed_diff = treatment_proportion - control_proportion

In [26]:
# Create the pool to draw the samples
control_no_conversions = control_users - control_conversions
treatment_no_conversions = treatment_users - treatment_conversions
conversion = [0] * (control_no_conversions + treatment_no_conversions)
conversion.extend([1] * (control_conversions + treatment_conversions))
conversion = pd.Series(conversion)

In [27]:
# Declare the permutation function
def permutation(x, nC, nT):
    n = nC + nT
    idx_T = set(random.sample(range(n), nT))
    idx_C = set(range(n)) - idx_T
    return x.loc[list(idx_T)].mean() - x.loc[list(idx_C)].mean()

In [28]:
# Execute the permutation test
random.seed(0)
perm_diffs = []
for _ in range(iterations):
    perm_diffs.append(
        permutation(
            conversion,
            control_users,
            treatment_users
        )
    )

In [29]:
# Calculate the p-value
if alternative == "smaller":
    p_value = np.mean([diff <= observed_diff for diff in perm_diffs])
elif alternative == "larger":
    p_value = np.mean([diff >= observed_diff for diff in perm_diffs])
elif alternative == "two-sided":
    p_value = np.mean([abs(diff) >= abs(observed_diff) for diff in perm_diffs])

In [30]:
# Show the result
if p_value <= alpha:
    outcome = "is"
else:
    outcome = "is not"
if round(p_value, 4) < 0.0001:
    value = "< 0.0001"
else:
    value = f"= {p_value:.4f}"
print(f"The difference {outcome} statistically significant, with a p-value {value}.")

The difference is not statistically significant, with a p-value = 0.0640.


### Means

#### t-test

In [31]:
# Import the libraries
import random
import numpy as np
import pandas as pd
from statsmodels.stats.weightstats import ttest_ind

In [32]:
# Load the CSV file
df = pd.read_csv("datasets/statistical_significance/dataset_1.csv")

In [33]:
# Define the parameters
alternative = "two-sided"
confidence = 0.95
alpha = 1 - confidence

In [34]:
# Calculate the observed difference
control_mean = df[df["Group"] == "Control"]["Measurement"].mean()
treatment_mean = df[df["Group"] == "Treatment"]["Measurement"].mean()
observed_diff = treatment_mean - control_mean

In [35]:
# Get the control and treatment measurements
control_measurements = df[df["Group"] == "Control"]["Measurement"]
treatment_measurements = df[df["Group"] == "Treatment"]["Measurement"]

In [36]:
# Calculate the p-value
tstat, p_value, dof = ttest_ind(
    treatment_measurements,
    control_measurements,
    alternative=alternative
)

In [37]:
# Show the result
if p_value <= alpha:
    outcome = "is"
else:
    outcome = "is not"
if round(p_value, 4) < 0.0001:
    value = "< 0.0001"
else:
    value = f"= {p_value:.4f}"
print(f"The difference {outcome} statistically significant, with a p-value {value}.")

The difference is statistically significant, with a p-value = 0.0271.


#### Z-test

In [38]:
# Import the libraries
import random
import numpy as np
import pandas as pd
from statsmodels.stats.weightstats import ztest

In [39]:
# Load the CSV file
df = pd.read_csv("datasets/statistical_significance/dataset_1.csv")

In [40]:
# Define the parameters
alternative = "two-sided"
confidence = 0.95
alpha = 1 - confidence

In [41]:
# Calculate the observed difference
control_mean = df[df["Group"] == "Control"]["Measurement"].mean()
treatment_mean = df[df["Group"] == "Treatment"]["Measurement"].mean()
observed_diff = treatment_mean - control_mean

In [42]:
# Get the control and treatment measurements
control_measurements = df[df["Group"] == "Control"]["Measurement"]
treatment_measurements = df[df["Group"] == "Treatment"]["Measurement"]

In [43]:
# Calculate the p-value
tstat, p_value = ztest(
    treatment_measurements,
    control_measurements,
    alternative=alternative
)

In [44]:
# Show the result
if p_value <= alpha:
    outcome = "is"
else:
    outcome = "is not"
if round(p_value, 4) < 0.0001:
    value = "< 0.0001"
else:
    value = f"= {p_value:.4f}"
print(f"The difference {outcome} statistically significant, with a p-value {value}.")

The difference is statistically significant, with a p-value = 0.0260.


#### Permutation

In [45]:
# Import the libraries
import random
import numpy as np
import pandas as pd

In [46]:
# Load the CSV file
df = pd.read_csv("datasets/statistical_significance/dataset_1.csv")

In [47]:
# Define the parameters
alternative = "two-sided"
confidence = 0.95
alpha = 1 - confidence
iterations = 1000

In [48]:
# Get the measurements and count the users
measurements = df["Measurement"]
control_users = df[df["Group"] == "Control"].shape[0]
treatment_users = df[df["Group"] == "Treatment"].shape[0]

In [49]:
# Calculate the observed difference
control_mean = df[df["Group"] == "Control"]["Measurement"].mean()
treatment_mean = df[df["Group"] == "Treatment"]["Measurement"].mean()
observed_diff = treatment_mean - control_mean

In [50]:
# Declare the permutation function
def permutation(x, nC, nT):
    n = nC + nT
    idx_C = set(random.sample(range(n), nT))
    idx_T = set(range(n)) - idx_C
    return x.loc[list(idx_T)].mean() - x.loc[list(idx_C)].mean()

In [51]:
# Execute the permutation test
random.seed(0)
perm_diffs = []
for _ in range(iterations):
    perm_diffs.append(
        permutation(
            measurements,
            control_users,
            treatment_users
        )
    )

In [52]:
# Calculate the p-value
if alternative == "smaller":
    p_value = np.mean([diff <= observed_diff for diff in perm_diffs])
elif alternative == "larger":
    p_value = np.mean([diff >= observed_diff for diff in perm_diffs])
elif alternative == "two-sided":
    p_value = np.mean([abs(diff) >= abs(observed_diff) for diff in perm_diffs])

In [53]:
# Show the result
if p_value <= alpha:
    outcome = "is"
else:
    outcome = "is not"
if round(p_value, 4) < 0.0001:
    value = "< 0.0001"
else:
    value = f"= {p_value:.4f}"
print(f"The difference {outcome} statistically significant, with a p-value {value}.")

The difference is statistically significant, with a p-value = 0.0290.
