In [None]:
# T-statistic 는 두 집단의 차이 / 표준오차. 즉, [표준오차]와 [표본평균사이의 차이]의 비율

# 모 분산을 모를 때 검정 통계량
#        x - u
#  t =  -------     ※ t분포 ( 자유도 = n-1 )
#       s / √n

In [1]:
import numpy as np
from scipy import stats

In [2]:
np.random.seed(42)
x = np.random.normal(loc=172, scale=5, size=20) # heights
x

array([174.48357077, 171.30867849, 175.23844269, 179.61514928,
       170.82923313, 170.82931522, 179.89606408, 175.83717365,
       169.65262807, 174.71280022, 169.68291154, 169.67135123,
       173.20981136, 162.43359878, 163.37541084, 169.18856235,
       166.9358444 , 173.57123666, 167.45987962, 164.93848149])

In [3]:
print("# 1-Sample T-test(단일 표본 t-검정)")

# 1-Sample T-test(단일 표본 t-검정)


In [5]:
stats.ttest_1samp(x, 175) # H0 => 평균이 175

Ttest_1sampResult(statistic=-3.5929686576289925, pvalue=0.0019392204848695247)

In [8]:
ret = stats.ttest_1samp(x, 173) # H0 => 평균이 173
print("result: {:f} {:f}".format(ret.statistic, ret.pvalue))

result: -1.729634 0.099909


In [59]:
from IPython import display
display.Image(url="https://wikimedia.org/api/rest_v1/media/math/render/svg/925bdf4f11c6938c078955ff9558de9995ea3660")

In [54]:
# another
n, (smin, smax), sm, sv, ss, sk = stats.describe(x)
sstr = '%-14s mean = %6.4f, variance = %6.4f, skew = %6.4f, kurtosis = %6.4f, size = %f'
print(sstr % ('sample:', sm, sv, ss, sk, n))
tt = (sm - 173)/np.sqrt(sv/float(n))  # t-statistic for mean
pval = stats.t.sf(np.abs(tt), n-1)*2  # two-sided pvalue = Prob(abs(t)>tt)
print('t-statistic = %6.6f pvalue = %6.6f' % (tt, pval))

sample:        mean = 171.1435, variance = 23.0414, skew = 0.0227, kurtosis = -0.5445, size = 20.000000
t-statistic = -1.729634 pvalue = 0.099909


In [57]:
# another, functions
def t_test(x, m): # m is population mean (u : mu)
    x = np.array(x)
    n = x.size
    sv = x.var(ddof = 1) # sv is sample variance
    sm = x.mean() # sm is sample mean
    t_statistic = (sm-m)/np.sqrt(sv/float(n)) # 
    
    rv = stats.t(n-1)
    pvalue = min ( rv.cdf(t_statistic) , 1 - rv.cdf(t_statistic) ) * 2
    # or simply
    # pvalue = stats.t.sf(np.abs(t), n-1)*2  # two-sided pvalue = Prob(abs(t)>tt)
    return t_statistic, pvalue

In [58]:
print('t-statistic = %6.6f pvalue = %6.6f' % t_test(x,173) )

t-statistic = -1.729634 pvalue = 0.099909


In [60]:
print("# Unpaired T-test(독립 표본 t-검정)")

# Unpaired T-test(독립 표본 t-검정)


In [61]:
heights1 = np.random.normal(loc=170, scale=5, size=20)
heights2 = np.random.normal(loc=175, scale=10, size=30)
stats.ttest_ind(heights1, heights2) # H0 => 두 집단의 평균이 같다

Ttest_indResult(statistic=-2.975034508185736, pvalue=0.004575417426774093)

In [62]:
print("# Paired T-test(대응 표본 t-검정)")

# Paired T-test(대응 표본 t-검정)


In [66]:
before = [170 + np.random.normal(0,5) for _ in range(20)]
after = before + np.random.random(size=20)
stats.ttest_rel(before, after) # H0 => 대응된 2 표본에 유의미한 차이가 없다 (변화가 없다)

Ttest_relResult(statistic=-9.978756995207744, pvalue=5.446380072416738e-09)

In [67]:
print("# 통계량으로 T-검정")

# 통계량으로 T-검정


In [68]:
# 두 그룹 (평균, 표준편차, 관찰 수) 의 차이 검정 (H0 => 차이가 없다)
stats.ttest_ind_from_stats(170,5,50, 175,10,20)

Ttest_indResult(statistic=-2.7877282097486784, pvalue=0.006875897429273035)