In [1]:
import pandas as pd 
import numpy as np
from scipy.stats import f

In [2]:
#定义双因素方差分析函数
def anova_2_factor(datas,factor_names=['A','B'],repeat_type=1):
    """
    parameters:
        datas:list--dimention is r*s*t or r*s
        factor_names:default=['A','B']
        repeat_type:0--no repeat(Dimention of datas is r*s),
                    1--repeat(Dimention of datas is r*s*t),     
                    defualt:1
    return:  
        A DataFrame contains basic information like ST,SE,SA,SB,F,P_value 
        and so on.
    """
    #无重复试验时数据为二维数组，为便于统一运算，将数据转换为三维数组形式
    if repeat_type==0:
        datas=np.expand_dims(datas,axis=2)     
    r,s,t=len(datas),len(datas[0]),len(datas[0][0])
    #总均值
    meanX=np.mean(datas) 
    #各水平搭配的均值
    meanX_ij=np.mean(datas,axis=2) 
    #'A'因素各水平的均值
    meanX_i=np.mean(meanX_ij,axis=1) 
    #'B'因素各水平的均值
    meanX_j=np.mean(meanX_ij,axis=0) 
    #总变差
    ST=np.sum((datas-meanX)**2) 
    #等重复试验的误差平方和
    SE=np.sum((datas-np.expand_dims(meanX_ij,axis=2))**2)
    #因素A效应平方和
    SA=s*t*np.sum((meanX_i-meanX)**2)
    #因素B效应平方和 
    SB=r*t*np.sum((meanX_j-meanX)**2) 
    #等重复试验的交互效应平方和
    SAB=ST-SE-SA-SB 
    #无重复实验的误差平方和
    if repeat_type==0:
        SE=ST-SA-SB 
    #创建数据框
    df=pd.DataFrame(columns=['平方和','自由度','均方','F比','P值'])
    if repeat_type==1:  #设定等重复试验时的行数据
        #因素A的“平方和,自由度,均方,F比,P值”
        df.loc[factor_names[0]]=SA,r-1,SA/(r-1),SA*r*s*(t-1)/SE/(r-1),\
              f(r-1,r*s*(t-1)).sf(SA*r*s*(t-1)/SE/(r-1))
        #因素B的“平方和,自由度,均方,F比,P值”
        df.loc[factor_names[1]]=SB,s-1,SB/(s-1),SB*r*s*(t-1)/SE/(s-1),\
              f(s-1,r*s*(t-1)).sf(SB*r*s*(t-1)/SE/(s-1))
        #因素A与B交互效应的“平方和,自由度,均方,F比,P值”
        df.loc[factor_names[0]+'X'+factor_names[1]]=SAB,(r-1)*(s-1),\
              SAB/(r-1)/(s-1),SAB*r*s*(t-1)/SE/(r-1)/(s-1),\
              f((r-1)*(s-1),r*s*(t-1)).sf(SAB*r*s*(t-1)/SE/(r-1)/(s-1))
        #误差的“平方和,自由度,均方”
        df.loc['E']=SE,r*s*(t-1),SE/r/s/(t-1),None,None
        #总平方和与自由度
        df.loc['T']=ST,r*s*t-1,None,None,None
    else:  #设定无重复试验时的行数据
        #因素A的“平方和,自由度,均方,F比,P值”
        df.loc[factor_names[0]]=SA,r-1,SA/(r-1),SA*(s-1)/SE,\
              f(r-1,(r-1)*(s-1)).sf(SA*(s-1)/SE)
        #因素B的“平方和,自由度,均方,F比,P值”
        df.loc[factor_names[1]]=SB,s-1,SB/(s-1),SB*(r-1)/SE,\
              f(s-1,(r-1)*(s-1)).sf(SB*(r-1)/SE)
        #误差的“平方和,自由度,均方”
        df.loc['E']=SE,(r-1)*(s-1),SE/(r-1)/(s-1),None,None
        #总平方和与自由度
        df.loc['T']=ST,r*s-1,None,None,None
    return df

In [3]:
#例21.4

In [4]:
#等重复试验时，数据用三维数组表示
data=[
        [[58.2,52.6],[56.2,41.2],[65.3,60.8]],
        [[49.1,42.8],[54.1,50.5],[51.6,48.4]],
        [[60.1,58.3],[70.9,73.2],[39.2,40.7]],
        [[75.8,71.5],[58.2,51.0],[48.7,41.4]]
    ]
#调用方差分析函数，返回方差分析表
anova_2_factor(data)

Unnamed: 0,平方和,自由度,均方,F比,P值
A,261.675,3.0,87.225,4.417388,0.025969
B,370.980833,2.0,185.490417,9.393902,0.003506
AXB,1768.6925,6.0,294.782083,14.928825,6.2e-05
E,236.95,12.0,19.745833,,
T,2638.298333,23.0,,,


In [5]:
#例21.5

In [6]:
x=[
    [[38,38.6],[47,44.8]],
    [[45,43.8],[42.4,40.8]]
]
anova_2_factor(x)

Unnamed: 0,平方和,自由度,均方,F比,P值
A,1.62,1.0,1.62,1.408696,0.300945
B,11.52,1.0,11.52,10.017391,0.03402
AXB,54.08,1.0,54.08,47.026087,0.002367
E,4.6,4.0,1.15,,
T,71.82,7.0,,,


In [7]:
#例21.6

In [8]:
#无重复试验时，数据用二维数组表示
data=[
    [76,67,81,56,51],
    [82,69,96,59,70],
    [68,59,67,54,42],
    [63,56,64,58,37]
]
#repeat_type默认为1，无重复试验时需设为0
anova_2_factor(data,factor_names=['时间','地点'],repeat_type=0)

Unnamed: 0,平方和,自由度,均方,F比,P值
时间,1182.95,3.0,394.316667,10.722411,0.001033
地点,1947.5,4.0,486.875,13.239293,0.000234
E,441.3,12.0,36.775,,
T,3571.75,19.0,,,
