[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert_v2/blob/main/part3/ch1/ch1_hypothesis_testing.ipynb)

# 2. 단일 표본검정

### 1. 기본학습

In [1]:
import pandas as pd

df = pd.DataFrame({
    'weights': [
        122,
        121,
        120,
        119,
        125,
        115,
        121,
        118,
        117,
        127,
        123,
        129,
        119,
        124,
        114,
        126,
        122,
        124,
        121,
        116,
        120,
        123,
        127,
        118,
        122,
        117,
        124,
        125,
        123,
        121,
    ],
})

In [2]:
from scipy import stats

t_statistic, p_value = stats.ttest_1samp(df['weights'], 120)
print('t-statistic:', t_statistic)
print('p-value:', p_value)

t-statistic: 2.1155384372682344
p-value: 0.043092957066609296


In [3]:
print(stats.ttest_1samp(df['weights'], 120))

TtestResult(statistic=2.1155384372682344, pvalue=0.043092957066609296, df=29)


In [4]:
import scipy

print(scipy.__version__)
# 1.10.1 이상

1.14.1


In [5]:
print(stats.ttest_1samp(df['weights'], 120, alternative='two-sided'))

TtestResult(statistic=2.1155384372682344, pvalue=0.043092957066609296, df=29)


In [6]:
print(stats.ttest_1samp(df['weights'], 120, alternative='greater'))

TtestResult(statistic=2.1155384372682344, pvalue=0.021546478533304648, df=29)


In [7]:
print(stats.ttest_1samp(df['weights'], 120, alternative='less'))

TtestResult(statistic=2.1155384372682344, pvalue=0.9784535214666953, df=29)


### 2. 심화학습

In [8]:
import pandas as pd

df = pd.DataFrame({'weights': [125, 126, 118, 124, 117, 127, 123, 122, 119, 142]})

In [9]:
from scipy import stats

stats.shapiro(df['weights'])

ShapiroResult(statistic=0.8164570347000635, pvalue=0.022960129822451016)

In [10]:
# Wilcoxon의 부호 순위 검정 수행
stats.wilcoxon(df['weights'] - 120, alternative='less')

WilcoxonResult(statistic=47.0, pvalue=0.9814453125)

# 3. 대응 표본검정

### 1. 기본학습

In [11]:
import pandas as pd

df = pd.DataFrame({
    'before': [85, 90, 92, 88, 86, 89, 83, 87],
    'after': [85.5, 89.9, 92.6, 89.5, 85.8, 88.8, 84.6, 87.8],
})

In [12]:
from scipy import stats

print(stats.ttest_rel(df['before'], df['after'], alternative='less'))

TtestResult(statistic=-2.2127749675452324, pvalue=0.03127028733756238, df=7)


In [13]:
print(stats.ttest_rel(df['after'], df['before'], alternative='greater'))

TtestResult(statistic=2.2127749675452324, pvalue=0.03127028733756238, df=7)


In [14]:
print(stats.ttest_rel(df['after'], df['before']))

TtestResult(statistic=2.2127749675452324, pvalue=0.06254057467512476, df=7)


 ### 2. 심화학습

In [15]:
import pandas as pd

df = pd.DataFrame({
    'before': [85, 90, 92, 88, 86, 89, 83, 87],
    'after': [86, 92, 94, 89, 84, 90, 84, 88],
})

In [16]:
from scipy import stats

df['diff'] = df['after'] - df['before']
stats.shapiro(df['diff'])

ShapiroResult(statistic=0.6886147375920879, pvalue=0.0016734051223900109)

In [17]:
stats.wilcoxon(df['after'], df['before'], alternative='greater')

WilcoxonResult(statistic=29.0, pvalue=0.07421875)

In [18]:
stats.wilcoxon(df['diff'], alternative='greater')

WilcoxonResult(statistic=29.0, pvalue=0.07421875)

# 4. 독립 표본검정

### 1. 기본학습

In [19]:
import pandas as pd

class1 = [85, 90, 92, 88, 86, 89, 83, 87]
class2 = [80, 82, 88, 85, 84]

In [20]:
from scipy import stats

print(stats.ttest_ind(class1, class2))

TtestResult(statistic=2.2108140580092237, pvalue=0.04914857789252186, df=11.0)


In [21]:
print(stats.ttest_ind(class1, class2, equal_var=False))

TtestResult(statistic=2.1818699281825236, pvalue=0.059589330071355334, df=8.272682358753572)


In [22]:
print(stats.ttest_ind(class1, class2, equal_var=True, alternative='less'))

TtestResult(statistic=2.2108140580092237, pvalue=0.9754257110537391, df=11.0)


In [23]:
print(stats.ttest_ind(class1, class2, equal_var=True, alternative='greater'))

TtestResult(statistic=2.2108140580092237, pvalue=0.02457428894626093, df=11.0)


### 2. 심화학습

In [24]:
import pandas as pd

class1 = [85, 90, 92, 88, 86, 89, 83, 87]
class2 = [80, 82, 88, 85, 84]

from scipy import stats

print(stats.shapiro(class1))
print(stats.shapiro(class2))

ShapiroResult(statistic=0.9981893537736595, pvalue=0.999986994137081)
ShapiroResult(statistic=0.9917398436295009, pvalue=0.9854182266624983)


In [25]:
print(stats.levene(class1, class2))

LeveneResult(statistic=0.0027925869510027727, pvalue=0.958802951766629)


In [26]:
print(stats.ttest_ind(class1, class2, alternative='less', equal_var=True))

TtestResult(statistic=2.2108140580092237, pvalue=0.9754257110537391, df=11.0)


In [27]:
import pandas as pd

class1 = [85, 90, 92, 88, 86, 89, 83, 87]
class2 = [80, 82, 88, 85, 130]

from scipy import stats

print(stats.shapiro(class1))
print(stats.shapiro(class2))

ShapiroResult(statistic=0.9981893537736595, pvalue=0.999986994137081)
ShapiroResult(statistic=0.6880497349322277, pvalue=0.007151570728885509)


In [28]:
stats.mannwhitneyu(class1, class2, alternative='less')

MannwhitneyuResult(statistic=26.0, pvalue=0.8299904236851448)