In [1]:
import numpy as np
import scipy as sp
import statsmodels as st

# Hypothesis

## Data

In [2]:
data = np.loadtxt('https://github.com/aloctavodia/BAP/raw/refs/heads/master/code/data/chemical_shifts.csv')
data[:10]

array([51.06, 55.12, 53.73, 50.24, 52.05, 56.4 , 48.45, 52.34, 55.65,
       51.49])

## Helpers

In [5]:
h0_or_h1 = lambda pvalue: 'HA' if pvalue < 0.05 else 'H0'

## Normality. Shapiro-Wilk Test

In [10]:
result = sp.stats.shapiro(data)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

ShapiroResult(statistic=0.8256072402000427, pvalue=5.199869974603644e-06)
Result: HA


## Student's T-test. ONE group of scores

In [12]:
result = sp.stats.ttest_1samp(data, popmean=0.5)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

Ttest_1sampResult(statistic=106.23530250014758, pvalue=1.2069687179735237e-57)
Result: HA


In [15]:
result = sp.stats.ttest_1samp(data, popmean=data.mean()-1)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

Ttest_1sampResult(statistic=2.0045736232401867, pvalue=0.05078876575447046)
Result: H0


## Student's T-test. TWO independent samples of scores

In [24]:
rvs = sp.stats.norm.rvs(loc=0, scale=10, size=50)
result = sp.stats.ttest_ind(data, rvs)

print(result)
print(f'Result: {h0_or_h1(result[1])}')

Ttest_indResult(statistic=35.112444863717755, pvalue=1.4036363550261703e-56)
Result: HA
