## Hypothesis testing

In [5]:
# import numpy
import numpy as np

# set a random seed to replicate results
np.random.seed(42)

# sales history in days
history = 365

# generate one-year sales for store A
mean_A = 20
std_A = 5
shop_A_sales = np.random.normal(mean_A, std_A, history)

# generate one-year sales for store B
mean_B = 19.5
std_B = 5
shop_B_sales = np.random.normal(mean_B, std_B, history)

## Set significance level

In [6]:
# set the significance level
alpha = 0.05

## Set the means

In [7]:
# print the store A mean
print(shop_A_sales.mean())

# print the store B mean
print(shop_B_sales.mean())

# the differnce in the means
observed_means_diff = shop_A_sales.mean() - shop_B_sales.mean()
print(observed_means_diff)

20.04973201106029
19.309929401404304
0.7398026096559853


### Because the mean of sales in the store A is not so far from the mean of sales in the store B, and their standard deviations are equal, it is tough to decide if the sales are equal.

## Use Central Limit Theorem (CLT) - the average of our samples' means will be the population mean.

In [9]:
both_sales = np.concatenate((shop_A_sales, shop_B_sales))

### perform permutation on both_sales and re-create the sales with this permutation data. A permutation is a random reordering of the entries in an array.

In [10]:
# permutation
sales_perm = np.random.permutation(both_sales)

# permutation replicates 
perm_shop_A_sales = sales_perm[:len(shop_A_sales)]
perm_shop_B_sales = sales_perm[len(shop_A_sales):]

### compute the difference between the permutation replicates means.

In [11]:
print(perm_shop_A_sales.mean() - perm_shop_B_sales.mean())

0.21098789154327235


### We can see that there is a difference between the permutation replicates mean and the original sales mean. But this was only one permutation. Let's try 1,000 different permutations and store the differences of the permutation replicates means in a list.

In [12]:
# create an empty list to store the permutation replicates means
perm_repl_means = []

for _ in range(1000):
    # permutation 
    sales_perm = np.random.permutation(both_sales)

    # permutation replicates 
    perm_shop_A_sales = sales_perm[:len(shop_A_sales)]
    perm_shop_B_sales = sales_perm[len(shop_A_sales):]

    # permutation replicates mean
    perm_repl_mean = perm_shop_A_sales.mean() - perm_shop_B_sales.mean()

    # append perm_repl_mean to list
    perm_repl_means.append(perm_repl_mean)

## compute the p-value.

In [14]:
# compute the p-value
p = np.sum(np.abs(perm_repl_means) >= observed_means_diff) / len(perm_repl_means)

# print the result
print('p-value =', p)

p-value = 0.043


In [15]:
# final decision
if p < alpha:
    print('H0 is rejected.')
else:
    print('H0 is not rejected.')

H0 is rejected.
