In [1]:
import numpy as np
import scipy as sp
import statsmodels as st

# Hypothesis

## Data

In [2]:
data = np.loadtxt('https://github.com/aloctavodia/BAP/raw/refs/heads/master/code/data/chemical_shifts.csv')
data[:10]

array([51.06, 55.12, 53.73, 50.24, 52.05, 56.4 , 48.45, 52.34, 55.65,
       51.49])

## Helpers

In [3]:
h0_or_h1 = lambda pvalue: 'HA' if pvalue < 0.05 else 'H0'

## Normality. Shapiro-Wilk Test

In [10]:
result = sp.stats.shapiro(data)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

ShapiroResult(statistic=0.8256072402000427, pvalue=5.199869974603644e-06)
Result: HA


## Student's T-test. ONE group of scores

In [12]:
result = sp.stats.ttest_1samp(data, popmean=0.5)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

Ttest_1sampResult(statistic=106.23530250014758, pvalue=1.2069687179735237e-57)
Result: HA


In [15]:
result = sp.stats.ttest_1samp(data, popmean=data.mean()-1)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

Ttest_1sampResult(statistic=2.0045736232401867, pvalue=0.05078876575447046)
Result: H0


## Student's T-test. TWO independent samples of scores

In [24]:
rvs = sp.stats.norm.rvs(loc=0, scale=10, size=50)
result = sp.stats.ttest_ind(data, rvs)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

Ttest_indResult(statistic=35.112444863717755, pvalue=1.4036363550261703e-56)
Result: HA


## Median

### Mann-Whitney. Independent data-sets

In [16]:
rvs = sp.stats.norm.rvs(loc=0, scale=1, size=50)
result = sp.stats.mannwhitneyu(data, rvs)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

MannwhitneyuResult(statistic=0.0, pvalue=7.671599742405718e-18)
Result: HA


### Wilcoxon. Related data-sets

In [14]:
data_2 = data*2 - 50
result = sp.stats.mannwhitneyu(data, data_2)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

MannwhitneyuResult(statistic=671.5, pvalue=0.00021796236396992233)
Result: HA


### Mood's Median Test

In [20]:
data_2 = data*2 - 50
rvs = sp.stats.norm.rvs(loc=0, scale=1, size=50)
result = sp.stats.median_test(data, data_2, rvs)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

(79.41666666666666, 5.687087991280189e-18, 51.935, array([[32, 41,  0],
       [16,  7, 50]], dtype=int64))
Result: HA


## Variance

In [7]:
data_2 = data*1.5 - 50
result = sp.stats.fligner(data, data_2)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

FlignerResult(statistic=4.419575689770522, pvalue=0.03552887018892202)
Result: HA


## Proportion

## Correlation

### Pearson for normal data

In [24]:
rvs_1 = sp.stats.norm.rvs(loc=0, scale=1, size=50)
rvs_2 = sp.stats.norm.rvs(loc=-1, scale=2, size=50)
result = sp.stats.pearsonr(rvs_1, rvs_2)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

(0.10002779772543781, 0.4894712437225972)
Result: H0


### Kendall’s tau for ordinal data

In [22]:
data_2 = data*1.5 - 25
result = sp.stats.kendalltau(np.vectorize(int)(data), np.vectorize(int)(data_2))

print(result)
print(f'Result: {h0_or_h1(result[1])}')

KendalltauResult(correlation=0.96994788905227, pvalue=2.149611161436706e-19)
Result: HA
