# Evaluation of hypothesis testing

In [None]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

## Multiple testing

Hypothetic null distribution. 
Feel feel to try any null distribution, examples below

In [None]:
## Example null distributions

# any_null_dist = stats.t(df=26, loc=0, scale=1)
# any_null_dist = stats.norm(loc=0.5, scale=3)

any_null_dist = stats.chi2(df=3, loc=0, scale=1)

In [None]:
## According p values

_stat_random = any_null_dist.rvs(size=1000, random_state=0)
_pdf_random = any_null_dist.pdf(_stat_random)


_pvals_onetail = 1 - any_null_dist.cdf(_stat_random)

# two-tailed is only valid for certain null distributions
_pvals_twotail = (1 - any_null_dist.cdf(np.abs(_stat_random))) * 2

#### Null distribution (of test statistic)

In [None]:
# Null distribution

plt.plot(_stat_random, _pdf_random, 'o')
plt.xlabel('test statistic')
plt.ylabel('Probability density function')
plt.show()

#### Null distribution of p value

In [None]:
plt.hist(_pvals_onetail, bins=20)
plt.xlabel('One-tailed p value')
plt.ylabel('Freuquency')
plt.title('Distribution of p value under the null hypothesis')
plt.show()

In [None]:
plt.hist(_pvals_twotail, bins=20)
plt.xlabel('Two-tailed p value')
plt.ylabel('Freuquency')
plt.title('Distribution of p value under the null hypothesis')
plt.show()

### Minimal p values in 10 tests

In [None]:
p_min_in10 = np.min(_pvals_onetail.reshape(10, -1), axis=0)

In [None]:
plt.hist(p_min_in10, bins=20)
plt.axvline(x=0.05, color='r')
plt.xlabel('One-tailed p value (minimal in 10 tests)')
plt.ylabel('Freuquency')
plt.title('Distribution of min p value in 10 tests')
plt.show()

In [None]:
print('%.2f of tests with min(p) < 0.05' %np.mean(p_min_in10 < 0.05))

## Power analysis

### Relationship beteween power other factors

In [None]:
def power_analysis(n_sample, effect_size, alpha=0.05):
    _n_obs = n_sample * 2
#     nobs = 1./ (1. / n_sample + 1. / n_sample)
#     _eff_loc = effect_size * np.sqrt(nobs)
    _eff_loc = effect_size / np.sqrt(2 / n_sample)
    # print(_eff_loc)

    _null_distr = stats.t(df=_n_obs - 2)
    _eff_distr = stats.t(loc=_eff_loc, df=_n_obs - 2)

    _xx = np.arange(-3, 3, 0.01)
    _yy_null = _null_distr.pdf(_xx)
    _yy_eff = _eff_distr.pdf(_xx)

    _stat_threshold = _null_distr.ppf(1 - alpha)
    _power = 1 - _eff_distr.cdf(_stat_threshold)
    
    plt.plot(_xx, _yy_null, label='Null')
    plt.plot(_xx, _yy_eff, label='Test statistic')
    # plt.axvline(x=_eff_loc, color='g')
    plt.axvline(x=_stat_threshold, color='r', label='Alpha=0.05')
    plt.xlabel('test statistic')
    plt.ylabel('Probability density function')
    plt.title('Power: %.3f with sample size=%d' %(_power, n_sample))
    plt.legend()
    
    return _power

In [None]:
n_samples = np.array([3, 5, 15, 40])
effect_sizes = np.array([0.6, 1.2])

In [None]:
# sample size (each group): 3
# effect size (standardised): 0.6
# siginficance level (alpha): 0.05

fig = plt.figure(dpi=100)
power_analysis(n_samples[0], effect_sizes[0])
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 7))
for i in range(len(n_samples)):
    for j in range(len(effect_sizes)):
        plt.subplot(2, 4, j*4+i+1)
        power_analysis(n_samples[i], effect_sizes[j])
        
plt.tight_layout()
plt.show()

### Calculate power with statsmodels

https://www.statsmodels.org/dev/generated/statsmodels.stats.power.TTestIndPower.html

In [None]:
from statsmodels.stats.power import TTestIndPower

analysis = TTestIndPower()
analysis.power(effect_size = 0.6, nobs1=3, 
               alpha=0.05, alternative='smaller')

### Estimate required minimum sample size

In [None]:
from statsmodels.stats.power import TTestIndPower

In [None]:
# parameters for power analysis

# population standard deviation
# pop_std = 0.162

standard_effect = 0.1 / 0.162
# standard_effect = 0.52 / 0.162
print('Standardised effect size: %.3f' %(standard_effect))

alpha = 0.05
power = 0.9

# perform power analysis
analysis = TTestIndPower()
result = analysis.solve_power(effect_size = standard_effect, 
                              power=power, nobs1=None, 
                              alpha=alpha, alternative='larger')

print('Required sample Size: %d' % np.ceil(result))