### **데이터 준비**

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [2]:
%precision 3

'%.3f'

In [3]:
np.random.seed(1111)

In [19]:
# df = {122.02, 131.73, 130.6, 131.82, 132.05, 126.12, 124.43, 132.89, 122.79, 129.95, 126.14, 134.45, 127.64, 125.68}
df = pd.read_csv('ch11_potato.csv')
sample = np.array(df['무게'])
sample

array([122.02, 131.73, 130.6 , 131.82, 132.05, 126.12, 124.43, 132.89,
       122.79, 129.95, 126.14, 134.45, 127.64, 125.68])

In [20]:
s_mean = np.mean(sample)
s_mean

128.4507142857143

In [8]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google'

In [6]:
path = '/content/drive/MyDrive/'

In [7]:
df = pd.read_csv(path + 'ch11_potato.csv')

FileNotFoundError: File b'/content/drive/MyDrive/ch11_potato.csv' does not exist

In [55]:
sample = np.array(df['무게'])
sample

array([122.02, 131.73, 130.6 , 131.82, 132.05, 126.12, 124.43, 132.89,
       122.79, 129.95, 126.14, 134.45, 127.64, 125.68])

In [44]:
s_mean = np.mean(sample)
s_mean

128.4507142857143

###**가설검증의 기본**

In [46]:
rv = stats.norm(130, np.sqrt(9/14))

In [47]:
rv.isf(0.95)

128.68118313069039

In [49]:
z = (s_mean-130)/np.sqrt(9/14)
z

-1.932298779026813

In [51]:
rv = stats.norm()
rv.isf(0.95)

-1.6448536269514722

In [52]:
rv.cdf(z)

0.026661319523126635

In [53]:
rv.interval(0.95)

(-1.959963984540054, 1.959963984540054)

In [54]:
rv.cdf(z)*2

0.05332263904625327

### **가설검증의 두가지 오류**

In [56]:
rv=stats.norm(130, 3)

In [57]:
c = stats.norm().isf(0.95)
n_samples = 10000
cnt = 0
for _ in range(n_samples):
  sample_ = np.round(rv.rvs(14), 2)
  s_mean_ = np.mean(sample_)
  z = (s_mean_-130)/np.sqrt(9/14)
  if z < c:
    cnt += 1
cnt/n_samples

0.053

In [58]:
rv = stats.norm(128, 3)

In [61]:
c = stats.norm().isf(0.95)
n_samples = 10000
cnt = 0
for _ in range(n_samples):
  sample_ = np.round(rv.rvs(14), 2)
  s_mean_ = np.mean(sample_)
  z = (s_mean_-130)/np.sqrt(9/14)
  if z >= c:
    cnt += 1
cnt/n_samples

0.197

### **가설검정**

**Z-검정**

In [68]:
def pmean_test(sample, mean0, p_var, alpha = 0.05):
  s_mean = np.mean(sample)
  n = len(sample)
  rv = stats.norm()
  interval = rv.interval(1-alpha)

  z=(s_mean-mean0)/np.sqrt(p_var/n)
  if interval[0] <= z <= interval[1]:
    print('귀무가설 채택-- H0 accept')
  else:
    print('귀무가설 기각-- H0 reject')

  if z < 0:
    p = rv.cdf(z) * 2
  else:
    p = (1 - rv.cdf(z)) * 2
  print(f'p값은 {p: .3f}')

In [69]:
pmean_test(sample, 130, 9)

귀무가설 채택-- H0 accept
p값은  0.053


**카이제곱검정**

In [70]:
def pvar_test(sample, var0, alpha = 0.05):
  u_var = np.var(sample, ddof = 1)
  n = len(sample)
  rv = stats.chi2(df = n-1)
  interval = rv.interval(1-alpha)

  y=(n-1)*u_var/var0
  if interval[0] <= y <= interval[1]:
    print('귀무가설 채택-- H0 accept')
  else:
    print('귀무가설 기각-- H0 reject')

  if y < rv.isf(0.5):
    p = rv.cdf(y) * 2
  else:
    p = (1 - rv.cdf(y)) * 2
  print(f'p값은 {p: .3f}')

In [71]:
pvar_test(sample, 9)

귀무가설 채택-- H0 accept
p값은  0.085


**t-검정**

In [72]:
def pmean_test(sample, mean0, alpha = 0.05):
  s_mean = np.mean(sample)
  u_var = np.var(sample, ddof = 1)
  n = len(sample)
  rv = stats.t(df = n-1)
  interval = rv.interval(1-alpha)

  t=(s_mean-mean0)/np.sqrt(u_var/n)
  if interval[0] <= z <= interval[1]:
    print('귀무가설 채택-- H0 accept')
  else:
    print('귀무가설 기각-- H0 reject')

  if z < 0:
    p = rv.cdf(t) * 2
  else:
    p = (1 - rv.cdf(t)) * 2
  print(f'p값은 {p: .3f}')

In [73]:
pmean_test(sample, 130)

귀무가설 채택-- H0 accept
p값은  0.169


In [74]:
t, p = stats.ttest_1samp(sample, 130)
t, p

(-1.4551960206404198, 0.16933464230414275)

END