In [2]:
import math
import numpy as np
import scipy.stats
import random

Enter the data by hand

In [3]:
X = [
    0.225,
    0.262,
    0.217,
    0.240,
    0.230,
    0.229,
    0.235,
    0.217
]

Y = [
    0.209,
    0.205,
    0.196,
    0.210,
    0.202,
    0.207,
    0.224,
    0.223,
    0.220,
    0.201
]

### Wald test

In [83]:
# Number of data points
m = len(X)
n = len(Y)

# Sample means
sample_mean_X = np.mean(X)
sample_mean_Y = np.mean(Y)

# Difference of means estimate
estimate = sample_mean_X - sample_mean_Y

# Population variances
pop_var_X = np.var(X)
pop_var_Y = np.var(Y)

# Estimated standard error
se = np.sqrt(pop_var_X/m + pop_var_Y/n)

# Wald test statistic
W = (sample_mean_X - sample_mean_Y)/se

# p-value
pvalue = 2*scipy.stats.norm.cdf(-abs(W))

# Confidence interval
alpha = 0.05
z = scipy.stats.norm.isf(alpha/2)
lower_bound = estimate - se*z
upper_bound = estimate + se*z

# Report out
print(
    f"The Wald statistic is {W:.2f}\n"
    f"The p-value is {pvalue:.6f}.\n\n"
    "A 95% confidence interval for the\n"
    f"difference of means is ({lower_bound:.2f}, {upper_bound:.2f})."
)

The Wald statistic is 3.94
The p-value is 0.000080.

A 95% confidence interval for the
difference of means is (0.01, 0.03).


### Permutation test

In [84]:
def sample_mean_difference(data):
    """
    Expects argument in the form data=(X, Y).
    """
    X, Y = data
    return abs(np.mean(X) - np.mean(Y))

def permutations(mylist, num):
    """ Generate num-many distinct permutations of mylist """
    
    permutation_list = []
    
    # Check that we are not asking for more distinct permutations than there are
    if num > math.factorial(len(mylist)):
        raise ValueError("num cannot be larger than factorial(mylist)")
    
    while len(permutation_list) < num:
        
        # Produce a permutation of mylist
        new_permutation = random.sample(mylist, len(mylist))
 
        if new_permutation not in permutation_list:
            permutation_list.append(new_permutation)
            
    return permutation_list

def data_permutations(data, num):
    """ 
    Given data=(X, Y) return a list of
    num-many permutations of the data,
    treating the Xi's and the Yj's
    interchangeably. Each permutation
    returned is in the form (X, Y) again.
    """
    
    X, Y = data
    combined_data = X + Y
    raw_permutation_list = permutations(X + Y, num)
    
    # For each permutation in the list,
    # split it into two pieces
    # according to how many Xi's and Yj's
    # were present in the original data
    m = len(X)
    processed_permutation_list = [
        (permutation[:m], permutation[m:])
        for permutation in raw_permutation_list
    ]
    
    return processed_permutation_list

def permutation_statistic(data, builder_statistic, num):
    """
    Uses num-many permutations to compute
    an approximation of the permutation test
    statistic (using test_statistic as the
    'builder' statistic -- by default
    this is the sample mean).
    
    Expects argument in the form data=(X, Y).
    builder_statistic should be a function
    taking as argument data=(X, Y).
    """
    
    reference_statistic_value = builder_statistic(data)
    
    permutation_test_statistic = sum(
        builder_statistic(permutation) > reference_statistic_value
        for permutation in data_permutations(data, num)
    )/num
    
    return permutation_test_statistic

In [112]:
# Compute the permutation test statistic
num=int(3e4)
pertmutation_test_statistic = permutation_statistic(
    data=(X, Y),
    builder_statistic=sample_mean_difference,
    num=num
)

# Report out
print(
    "For the permutation test,"
    f"the p-value is {pertmutation_test_statistic:.5f}.\n"
    f"(Obtained using {int(num)} permutations.)"
)

For the permutation test,the p-value is 0.00087.
(Obtained using 30000 permutations.)
