# Chapter 14 Nonparametric Statistical Methods

In [1]:
import math
import itertools

import polars as pl
from polars import col, lit
from scipy import stats
import numpy as np
import altair as alt

RNG = np.random.default_rng()
DATA = {}  # input data
ANS = {}   # calculation results

## 14.1 Inferences for Single Samples

In scipy:
- The sign test is implemented as [stats.quantile_test](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.quantile_test.html).
- The Wilcoxon signed rank test is implemented as [stats.wilcoxon](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html).

### Ex 14.1

In [2]:
DATA['14.1'] = np.array([37, 26, 31, 35, 32, 32, 27, 31, 34, 36])

#### (a)

For the exact p-value:

In [3]:
stats.quantile_test(DATA['14.1'], q=30, p=0.5, alternative='greater')

QuantileTestResult(statistic=2, statistic_type=1, pvalue=0.0546875)

which can be calculated directly using the binomial CDF.

In [36]:
stats.binom.cdf(np.count_nonzero(DATA['14.1'] <= 30), n=DATA['14.1'].size, p=0.5)

0.0546875

Or with normal approximation with continuity correction:

In [29]:
stats.norm.cdf((np.count_nonzero(DATA['14.1'] <= 30) - DATA['14.1'].size * 0.5 + 0.5) / np.sqrt(DATA['14.1'].size * 0.5 * 0.5))

0.056923149003329024

In any case the p-value is > α = 0.05, so cannot reject $H_0$

#### (b)

For the exact test:

In [36]:
stats.wilcoxon(DATA['14.1']-30, alternative='greater', method='exact')

WilcoxonResult(statistic=43.5, pvalue=0.0654296875)

Normal approximation with continuity correction:

In [38]:
stats.wilcoxon(DATA['14.1']-30, alternative='greater', method='approx', correction=True)

WilcoxonResult(statistic=43.5, pvalue=0.05671152359300298)

In any case the p-value is > α = 0.05, so cannot reject $H_0$

### Ex 14.2

In [42]:
DATA['14.2'] = pl.DataFrame({
    'pair': range(1, 13),
    'treated': [14, 26, 2, 4, -5, 14, 3, -1, 1, 6, 3, 4],
    'control': [8, 18, -7, -1, 2, 9, 0, -4, 13, 3, 3, 3]})
DATA['14.2']

pair,treated,control
i64,i64,i64
1,14,8
2,26,18
3,2,-7
4,4,-1
5,-5,2
6,14,9
7,3,0
8,-1,-4
9,1,13
10,6,3


In [54]:
stats.quantile_test(
    DATA['14.2'].select(col('treated')-col('control')).to_series(), 
    alternative='greater')

QuantileTestResult(statistic=3, statistic_type=1, pvalue=0.072998046875)

In [58]:
stats.wilcoxon(
    DATA['14.2'].get_column('treated'),
    DATA['14.2'].get_column('control'),
    alternative='greater')

WilcoxonResult(statistic=47.0, pvalue=0.10604514898621259)

Using both the sign test and Wilcoxon signed rank test, the p-value indicates that VB does not improve the IQ at the 0.05 significance level. However the Wilcoxon test gives an even less significant p-value because the negatives have greater magnitudes than the positives -- somewhat increasing the possibility that the true median is 0 when considering only the signs. 

In [67]:
(
    alt.Chart(
        DATA['14.2'].select(
            (col('treated')-col('control')).alias('diff')))
    .mark_tick()
    .encode(
        alt.X('diff:Q').axis(grid=False)))

## 14.5 *Resampling Methods

The inherent error of resampling could be far less serious than making wrong and often unverifiable assumptions about the population distribution.

- permutation test: draw from samples *without replacement*
- bootstrap method: draw from samples *with replacement*
- jackknife method: delete one observation at a time.

### Ex 14.34

In [2]:
method_1 = np.array([12.0129, 12.0072, 12.0064, 12.0054])
method_2 = np.array([12.0318, 12.0246, 12.0069])

In [6]:
def get_statistic(m1: np.ndarray, m2: np.ndarray) -> float:
    return np.mean(m1) - np.mean(m2)

In [12]:
res = stats.permutation_test(
    (method_1, method_2), get_statistic, alternative='two-sided', n_resamples=np.inf)
res.pvalue

0.17142857142857143

In [22]:
(
    alt.Chart(pl.DataFrame({'diff': res.null_distribution}))
    .mark_bar()
    .encode(
        alt.X('diff', bin=True),
        alt.Y('count()')))