# Multiple Testing

### Loading Libraries

In [None]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import matplotlib.pyplot as plt

# StatsModels
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests as mult_test

# SciPy
from scipy.stats import (ttest_1samp, ttest_rel, ttest_ind, t as t_dbn)

# ISLP
from ISLP import load_data

### Review of Hypothesis Tests

In [None]:
rng = np.random.default_rng(12)

X = rng.standard_normal((10, 100))

true_mean = np.array([0.5]*50 + [0]*50)

X += true_mean[None, :]

In [None]:
result = ttest_1samp (X[:,0], 0)
result.pvalue

In [None]:
p_values = np.empty(100)

for i in range(100):
    p_values [i] = ttest_1samp(X[:, i], 0).pvalue
    decision = pd.cut(p_values,
                      [0, 0.05, 1],
                      labels =['Reject H0',
                               'Do not reject H0'])

truth = pd.Categorical(true_mean == 0,
                       categories =[True, False],
                       ordered=True)

In [None]:
pd.crosstab(decision,
            truth,
            rownames =['Decision'],
            colnames =['H0'])

In [None]:
true_mean = np.array([1]*50 + [0]*50)

X = rng.standard_normal((10, 100))
X += true_mean [None, :]

for i in range(100):
    p_values[i] = ttest_1samp(X[:, i], 0).pvalue
    decision = pd.cut(p_values,
                      [0, 0.05, 1],
                      labels =['Reject H0',
                               'Do not reject H0'])

truth = pd.Categorical(true_mean == 0,
                       categories=[True, False],
                       ordered=True)
pd. crosstab (decision,
              truth,
              rownames =['Decision'],
              colnames =['H0'])

### Family-Wise Error Rate

In [None]:
m = np.linspace(1, 501)

fig, ax = plt.subplots()
[ax.plot(m,
         1 - (1 - alpha)**m,
         label=r'$\alpha =%s$' % str(alpha))

for alpha in [0.05, 0.01, 0.001]]
ax.set_xscale('log')
ax.set_xlabel('Number of Hypotheses')
ax.set_ylabel('Family -Wise Error Rate')
ax.legend()
ax.axhline(0.05, c='k', ls='--');

plt.grid(True)
plt.show()

In [None]:
Fund = load_data('Fund')
fund_mini = Fund.iloc[:, :5]
fund_mini_pvals = np.empty(5)

for i in range(5):
    fund_mini_pvals[i] = ttest_1samp(fund_mini.iloc[:, i], 0).pvalue

fund_mini_pvals

In [None]:
reject, bonf = mult_test(fund_mini_pvals, method = "bonferroni")[:2]

reject

In [None]:
bonf, np.minimum(fund_mini_pvals * 5, 1)

In [None]:
mult_test(fund_mini_pvals, method = "holm", alpha =0.05)[:2]

In [None]:
fund_mini.mean()

In [None]:
ttest_rel(fund_mini['Manager1'],
          fund_mini ['Manager2']).pvalue

In [None]:
returns = np.hstack([fund_mini.iloc[:, i] for i in range(5)])

managers = np.hstack([[i+1]*50 for i in range(5)])

tukey = pairwise_tukeyhsd(returns, managers)
print(tukey.summary())

In [None]:
fig, ax = plt.subplots(figsize=(8 ,8))

tukey.plot_simultaneous(ax=ax);
plt.grid(True)
plt.show()

### False Discovery Rate

In [None]:
fund_pvalues = np.empty (2000)

for i, manager in enumerate(Fund.columns):
    fund_pvalues[i] = ttest_1samp(Fund[manager], 0).pvalue

In [None]:
fund_qvalues = mult_test(fund_pvalues, method = "fdr_bh")[1]

fund_qvalues [:10]

In [None]:
(fund_qvalues <= 0.1).sum()

In [None]:
(fund_pvalues <= 0.1 / 2000).sum()

In [None]:
sorted_ = np.sort(fund_pvalues
                 )
m = fund_pvalues.shape[0]
q = 0.1

sorted_set_ = np.where(sorted_ < q * np. linspace (1, m, m) / m)[0]

if sorted_set_ .shape[0] > 0:
    selected_ = fund_pvalues < sorted_[sorted_set_].max()
    sorted_set_ = np.arange(sorted_set_ .max())
else:
    selected_ = []
    sorted_set_ = []

In [None]:
fig, ax = plt.subplots()
ax.scatter(np.arange(0, sorted_.shape[0]) + 1,
           sorted_,
           s=10)

ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylabel('P-Value')
ax.set_xlabel('Index')
ax.scatter(sorted_set_ +1, sorted_[ sorted_set_], c='r', s=20)
ax.axline ((0, 0), (1,q/m), c='k', ls='--', linewidth =3);
plt.grid(True)
plt.show()