## 정규성 검정

In [2]:
import pandas as pd
from scipy import stats

ds = pd.read_csv("1BD/2sample_t_test.csv")
print(ds.head())

d1 = ds["Mat_A"]
d2 = ds["Mat_B"]

# 정규성 검정
print(stats.shapiro(d1))
print(stats.shapiro(d2))

   Mat_A  Mat_B  Total  Sub
0   73.4   68.7   73.4    1
1   77.0   71.4   77.0    1
2   73.7   69.8   73.7    1
3   73.3   75.3   73.3    1
4   73.1   71.3   73.1    1
ShapiroResult(statistic=nan, pvalue=1.0)
ShapiroResult(statistic=nan, pvalue=1.0)


## 모평균 구간 추정

In [3]:
import pandas as pd
from scipy import stats
import numpy as np

df = pd.DataFrame({"sample" :[18, 18, 20, 21, 20, 23, 19, 18, 17, 21, 22, 20, 20, 21, 20, 19, 19, 18, 17, 19]})
print(df.head())

lower, upper = stats.norm.interval(0.95, loc = np.mean(df), scale=0.397/np.sqrt(20))
print("신뢰구간: ([{0}, {1})".format(lower.round(2), upper.round(2)))

   sample
0      18
1      18
2      20
3      21
4      20
신뢰구간: ([[19.33], [19.67])


  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


## 추론 통계

In [6]:
import pandas as pd
from scipy import stats
import numpy as np

df = pd.DataFrame({"sample" :[73, 71, 74, 69, 70, 73, 70, 68, 75, 72, 70, 72, 73, 70, 70, 72, 71, 70, 75, 72]})
lower, upper = stats.t.interval(0.95, len(df)-1,loc = np.mean(df), scale=stats.sem(df))
print("신뢰구간: ([{0}, {1})".format(lower.round(2), upper.round(2)))

신뢰구간: ([[70.6], [72.4])


  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


In [7]:
import pandas as pd
from scipy import stats

df = ([74.5, 81.2, 73.8, 82.0, 76.3, 75.7, 80.2, 72.6, 77.9, 82.8])
t_res = stats.ttest_1samp(df, 76.7)

t, p = t_res.statistic.round(3), t_res.statistic.round(3)

print("1-Sample t-test")
print("t 통계량: {}".format(t))
print("p 통계량: {}".format(p))

1-Sample t-test
t 통계량: 0.864
p 통계량: 0.864


### 두 집단 간 평균 같은지 비교

In [11]:
import pandas as pd
from scipy import stats

df = pd.DataFrame({"A":[1.883, 1.715, 1.799, 1.768, 1.711, 1.832, 1.427, 1.344], 
"B":[1.435, 1.572, 1.486, 1.511, 1.457, 1.548, 1.404, 1.883]})

statistic, p = stats.shapiro(df["A"])
print("statistic={}, p-value={}".format(statistic, p))
statistic, p = stats.shapiro(df["B"])
print("statistic={}, p-value={}".format(statistic, p))

stats.bartlett(df["A"], df["B"])

statistic=0.8425754904747009, p-value=0.08004532009363174
statistic=0.7734813094139099, p-value=0.014793775975704193


BartlettResult(statistic=0.4241143652313345, pvalue=0.5148911629500695)

### 짝을 이루는 집단의 평균 차이 있는지 검정

In [15]:
 import pandas as pd
from scipy import stats

df1 = pd.DataFrame({"before":[720, 589, 780, 648, 720, 589, 780, 648, 780, 648]}) 
df2 = pd.DataFrame({"after":[710, 580, 787, 712, 750, 600, 782, 670, 790, 680]})

t_res = stats.ttest_rel(df1, df2)
t, p = t_res.statistic.round(3), t_res.pvalue.round(3)
print("Paired t-test")
print("t: {}".format(t))
print("p: {}".format(p))

Paired t-test
t: [-2.266]
p: [0.05]


### 동전

In [20]:
import pandas as pd
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest

count = 40
n_obs = 100
value = 0.5

stat, pval = proportions_ztest(count, n_obs, value)
print("1 Proportion test")
print("z: {0:0.3f}".format(stat))
print("p: {0:0.3f}".format(pval))

1 Proportion test
z: -2.041
p: 0.041


### 불량품

In [23]:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest

count = np.array([14, 5])
nobs = np.array([1200, 1200])

stat, pval = proportions_ztest(count, nobs)

print("2 Proportion test")
print("z: {0:0.3f}".format(stat))
print("p: {0:0.3f}".format(pval))

2 Proportion test
z: 2.073
p: 0.038


## 카이제곱 검정

In [25]:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest

df = pd.read_csv("통계data/카이제곱_노트북.csv")

chi, pval, dof, expected = stats.chi2_contingency(df)

print("Chi-square test")
print("chisq: {0:0.3f}".format(chi))
print("p: {0:0.3f}".format(pval))
print("dgree pf freedom: {}".format(dof))
print("expected value: \n{}".format(expected.round(3)))

Chi-square test
chisq: 95.759
p: 0.000
dgree pf freedom: 30
expected value: 
[[10.224  8.43   5.381  3.946  1.973 10.045]
 [ 8.179  6.744  4.305  3.157  1.578  8.036]
 [11.247  9.274  5.919  4.341  2.17  11.049]
 [ 7.157  5.901  3.767  2.762  1.381  7.031]
 [ 6.135  5.058  3.229  2.368  1.184  6.027]
 [ 8.179  6.744  4.305  3.157  1.578  8.036]
 [ 5.879  4.848  3.094  2.269  1.135  5.776]]


## ANOVA

### 쿠키

In [29]:
import pandas as pd
import numpy as np
from scipy import stats

df = pd.read_csv("통계data/correlation.csv")
print(df.head())
corr, pval = stats.pearsonr(df["strength"], df["temp"])
print("Corr Analysis")
print("corr: {0:0.3f}".format(corr))
print("p-value: {0:0.3f}".format(pval))

   strength  temp    Quantity    b_time    Oven_TMP
0      37.6    14  120.790646  0.798922  307.106574
1      38.6    15   99.643155  0.849020  318.178850
2      37.2    14  115.102395  0.816163  339.715122
3      36.4    16  114.640504  0.758266  341.174398
4      38.6    17  141.266536  0.815894  335.904402
Corr Analysis
corr: -0.899
p-value: 0.000
