# <center>第6章 简单统计推断</center>

# 6.2 单总体参数的参数估计

#### 6.2.1.1 单总体均值的参数估计

In [6]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

path=r"D:\文档\Python Scripts\Python数据分析基础（第2版）数据\ch6\moisture.csv"
moisture=pd.read_csv(path)
#Z估计
print(sm.stats.DescrStatsW(moisture).zconfint_mean(alpha=0.05))
#t分布下的估计区间
print(sm.stats.DescrStatsW(moisture).tconfint_mean(alpha=0.05))
#scipy的t分布估计
mean,var,std=stats.bayes_mvs(moisture,alpha=0.95)
print(mean,var,std,sep='\n')

(array([3.85610519]), array([4.09109481]))
(array([3.85313112]), array([4.09406888]))
Mean(statistic=3.9736000000000002, minmax=(3.853131123764977, 4.094068876235023))
Variance(statistic=0.18733089361702127, minmax=(0.12538093683821308, 0.2790231439977582))
Std_dev(statistic=0.43052145521911656, minmax=(0.35409170681931124, 0.5282264135744805))


#### 6.2.1.2 单总体方差、标准差的参数估计

In [7]:
#使用scipy.stats的mvsdist函数
m,v,s=stats.mvsdist(moisture)
print(m.interval(0.95))#返回95%置信度下总体均值
print(m.std())#返回均值标准差
print(v.interval(0.95))#总体方差
print(s.interval(0.95))#总体均值


(3.853131123764977, 4.094068876235023)
(0.12538093683821308, 0.2790231439977582)
(0.35409170681931124, 0.5282264135744805)


#### 6.2.1.1 单总体比例的参数估计

In [9]:
#比如说产品的合格率问题

sm.stats.proportion_confint(95,100,alpha=0.05,method='normal')

(0.9072835752920528, 0.9927164247079471)

### 6.2.2 单总体参数的假设检验

#### 6.2.2.1 总体均值的假设检验

In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

path=r"D:\文档\Python Scripts\Python数据分析基础（第2版）数据\ch6\moisture.csv"
moisture=pd.read_csv(path)

#总体方差已知，用Z统计量，返回z统计量和p值
#alternative可选参数为：'two-sided','larger','smaller'
print(sm.stats.DescrStatsW(moisture).ztest_mean(value=4,alternative='larger'))

#总体方差未知,使用t统计量
print(sm.statas.DescrStatsW(moisture).ttest_mean(value=4,alternative='larger'))

(array([-0.44038583]), array([0.67017116]))

#### 6.2.2.2 总体比例的假设检验

In [7]:
#产品合格率的检验
print(stats.binom_test(95,100,p=0.97,alternative='greater'))#返回p值
print(sm.stats.binom_test(95,100,prop=0.97,alternative='larger'))
print(sm.stats.proportions_ztest(95,100,value=0.97,alternative='larger'))#返回z统计量和p值

0.9191628710986264
0.9191628710986264
(-0.9176629354822475, 0.8206023210565294)


## 6.3 两总体参数的假设检验

>主要考察两个总体的参数是否有差异

### 6.3.1 独立样本的假设检验

#### 6.3.1.1 独立样本均值的假设检验

In [15]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm

path=r"D:\文档\Python Scripts\Python数据分析基础（第2版）数据\ch6\battery.csv"
data=pd.read_csv(path)
print(data.head())

#进行独立样本均值的t检验，需要实现对两样本总体方差是否相等进行检验
print(stats.bartlett(data[data['tech']==1]['Endurance'],data[data['tech']==2]['Endurance']))#根据结果可以认为总体方差相等

#均值检验
print(stats.ttest_ind(data[data['tech']==1]['Endurance'],data[data['tech']==2]['Endurance'],equal_var=True))
print(sm.stats.ttest_ind(data[data['tech']==1]['Endurance'],data[data['tech']==2]['Endurance'],alternative='two-sided',usevar='pooled',value=0))

   Endurance  tech
0        4.1     1
1        3.7     1
2        3.5     1
3        3.9     1
4        4.1     1
BartlettResult(statistic=3.3228777945188592, pvalue=0.06832213694213818)
Ttest_indResult(statistic=-2.9908265619140626, pvalue=0.0038722567339729993)
(-2.9908265619140626, 0.0038722567339729993, 68.0)


#### 6.3.1.2 独立样本比例之差的假设检验

In [20]:
path=r"D:\文档\Python Scripts\Python数据分析基础（第2版）数据\ch6\magzine.csv"
magzine=pd.read_csv(path)
print(magzine.head())
magzine['name']=magzine['name'].astype('category')
magzine['name'].cat.categories=['Fashion','Cosmetic']
magzine['gender']=magzine['gender'].astype('category')
magzine['gender'].cat.categories=['Male','Female']
female=magzine[magzine['gender']=='Female']['name'].value_counts()
print(female)
name=magzine['name'].value_counts()
sm.stats.proportions_ztest(female,name,value=0.3,alternative='smaller',prop_var=False)

   name  gender
0     1       1
1     1       2
2     1       1
3     1       1
4     1       1
Cosmetic    35
Fashion     16
Name: name, dtype: int64


(-0.0893894201435671, 0.4643862156571413)

### 6.3.2 成对样本的假设检验

In [23]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm


path=r"D:\文档\Python Scripts\Python数据分析基础（第2版）数据\ch6\happiness.csv"
data=pd.read_csv(path)
print(data.head())
#原假设是H0：u1-u2>=0即居民幸福指数没有提高
stats.ttest_rel(data['Year2015'],data['Year2016'])

   Year2015  Year2016
0     69.48     77.44
1     82.51     67.49
2     82.12     64.56
3     70.32     70.14
4     75.29     74.72


Ttest_relResult(statistic=-0.45945807951277384, pvalue=0.6464067663555169)