# Chapter 8: Practical considerations

In [None]:
import numpy as np
import scipy
import scipy.stats
import matplotlib as mpl
import matplotlib.pyplot as plt
from e4e import E4E
e4e = E4E(chapter=8)

## 8.1	Violation of statistical assumptions

### 8.1.1	Violation of the iid assumption

#### INDEPENDENCE

In [None]:
ind_meas = np.array([1.5]*10)
print (ind_meas)
print (ind_meas.mean())
print (ind_meas.std())

In [None]:
ind_meas = np.array([1.5]*8 + [2.3, 3.1])
print (ind_meas)
print (ind_meas.mean())
print (ind_meas.std() / np.sqrt(10))

In [None]:
ind_meas = np.array([1.5, 2.3, 3.1])
ind_meas.std() / np.sqrt(3)

In [None]:
# Listing 8.12 Simulate an A/B test and monitor the z score
def z_score_vs_n():
    def profit_A():
        return np.random.uniform(0,1)
    def profit_B():
        return np.random.uniform(0,1)

    z_scores = []
    ind_measurements_A = []
    ind_measurements_B = []
    for n in range(1, 100):
        ind_measurements_A.append(profit_A())
        ind_measurements_B.append(profit_B())
        a = np.array(ind_measurements_A)
        b = np.array(ind_measurements_B)
        delta = b.mean() - a.mean()
        se_delta = np.sqrt(a.std()**2 + b.std()**2) / np.sqrt(n)
        z_scores.append(delta / se_delta)
    return np.array(z_scores)

In [None]:
def do_fig(seed):
    np.random.seed(seed)
    z = z_score_vs_n()
    plt.plot(z, color=e4e.color_1);
    e4e.horizontal_line(1.64)
    e4e.horizontal_line(-1.64)
    c = plt.axis()
    cy = max(abs(c[2]), abs(c[3]))
    plt.axis([c[0], c[1], -cy, cy])
    print (np.where(z > 1.64))
    plt.xlabel("individual measurement number (n)")
    plt.ylabel("z score")
do_fig(29)
e4e.save_fig(1)

In [None]:
do_fig(39)
e4e.save_fig(2)

In [None]:
def z_score_vs_n_fast(N):
    a = np.random.uniform(size=(N,))
    b = np.random.uniform(size=(N,))
    N = np.arange(1, N+1)
    sx = np.cumsum(b-a)
    sxx = np.cumsum((b-a)**2)
    mu = sx/N
    sd = np.sqrt(sxx/N - mu**2)
    z_score = np.sqrt(N) * mu/sd
    return z_score

def false_positive_rates(N):
    num_ab_tests = 10000
    fp_at_end = 0
    fp_with_early_stopping = 0
    for _ in range(num_ab_tests):
        z = z_score_vs_n_fast(N) 
        if z[-1] > 1.64:
            fp_at_end += 1
        i = np.where(abs(z[1:]) > 1.64)[0]
        if len(i) > 0:
            fp_with_early_stopping += 1
    return fp_at_end / num_ab_tests, fp_with_early_stopping / num_ab_tests
        

In [None]:
np.random.seed(17); false_positive_rates(100)

In [None]:
np.random.seed(17)
fpr = []
for N in [10, 30, 100, 300, 1000, 3000, int(1e4)]:
    fp_0, fp_N = false_positive_rates(N)
    print (N, fp_0, fp_N)
    fpr.append( (N, fp_0, fp_N)) 

In [None]:
fpr = np.array(fpr)
plt.semilogx(fpr[:,0], fpr[:,1], '-', color=e4e.color_1);
plt.semilogx(fpr[:,0], fpr[:,2], '.--', color=e4e.color_2);
plt.xlabel('N')
plt.ylabel('false positive rate')
plt.legend(['waiting until end', 'early stopping'])
e4e.save_fig(3)

## 8.3	Control family-wise error

### 8.3.1	Cherry-picking increases the false positive rate

In [None]:
M = np.arange(1, 30)
plt.plot(M, 1 - .95**M, 'o--', color=e4e.color_1);
plt.xlabel('M')
plt.ylabel('p_any')
e4e.save_fig(4)

In [None]:
plt.plot(M, 1 - .95**M, 'o--', color=e4e.color_1);
plt.plot(M, 1 - (1-.05/M)**M, '.:', color=e4e.color_2);
plt.xlabel('M')
plt.ylabel('p_any')
plt.legend(['family-wise false positive rate', 'family-wise false positive rate,\nwith Bonferroni correction'])
plt.axis([0, 17, 0, .6])
e4e.save_fig(5)

In [None]:
import scipy.stats as ss
z = ss.norm()


vppf = np.vectorize(z.ppf)
N = ((.84 + vppf(1-0.05/M)) )**2
N = N / N[0]

plt.plot(M, N, 'o--', color=e4e.color_1);
plt.xlabel('M')
plt.ylabel('N (relative)')
e4e.save_fig(6)

## 8.4	Be aware of common biases

### 8.4.1	Confounder bias

### 8.4.2	Small-sample bias

### 8.4.3	Optimism bias

### 8.4.4	Experimenter bias