In [1]:
import pandas as pd 

In [None]:
df = pd.read_excel("./adni589-eur.xlsx")

features = ['AGE', 'PTEDUCAT', 'CDRSB', 'ADAS11', 'ADAS13',
       'ADASQ4', 'MMSE', 'RAVLTimmediate', 'RAVLTlearning', 'RAVLTforgetting',
       'RAVLTpercforgetting', 'LDELTOTAL', 'TRABSCOR', 'FAQ']

#### 正态性检验

In [None]:
import statsmodels.api as sm #导入statsmodels.api包
import pylab

##分组绘制Q-Q图##
sm.qqplot(df.loc[df.loc[:,'DX']=='CN','AGE'], line='s') #选取第一组数据
pylab.show() #显示第一组QQ图
sm.qqplot(df.loc[df.loc[:,'DX']=='MCI','AGE'], line='s') #选取第二组数据
pylab.show() #显示第二组QQ图
sm.qqplot(df.loc[df.loc[:,'DX']=='AD','AGE'], line='s') #选取第三组数据
pylab.show() #显示第三组QQ图

In [None]:
from scipy import stats #导入scipy包

##正态性检验##
for feat in features:
    print("正态性检验指标：", feat)
    shapiro_test1 = stats.shapiro(df.loc[df.loc[:,'DX']=='CN',feat]) 
    print(shapiro_test1)
    shapiro_test2 = stats.shapiro(df.loc[df.loc[:,'DX']=='MCI',feat])
    print(shapiro_test2)
    shapiro_test3 = stats.shapiro(df.loc[df.loc[:,'DX']=='AD',feat])
    print(shapiro_test3)

#### 方差齐性检验

In [None]:
##计算均数##
df_mean = df.groupby('DX').mean()
##计算标准差##
df_std = df.groupby('DX').std()

In [None]:
##levene法方差齐性检验##
for feat in features:
    print("方差齐性检验指标：", feat)
    group0 = df.loc[df.loc[:,'DX']=='CN'][feat] #选取第一组数据
    group1 = df.loc[df.loc[:,'DX']=='MCI'][feat] #选取第二组数据
    group2 = df.loc[df.loc[:,'DX']=='AD'][feat] #选取第三组数据
    leveneTestRes = stats.levene(group0, group1, group2, center='mean') #levene法齐性检验
    print(leveneTestRes) #显示检验结果

#### 非正态分布（Dunn's 检验）

In [None]:
import numpy as np
import scipy.stats as stats
import scikit_posthocs as sp

xianzhu=[]
for feat in features:
    print("检验指标：", feat)
    group1 = df.loc[df.loc[:,'DX']=='CN'][feat] #选取第一组数据
    group2 = df.loc[df.loc[:,'DX']=='MCI'][feat] #选取第二组数据
    group3 = df.loc[df.loc[:,'DX']=='AD'][feat] #选取第三组数据
    # Kruskal-Wallis 检验
    stat, p = stats.kruskal(group1, group2, group3)

    print('Kruskal-Wallis test:')
    # 使用 f-string 格式化输出，保留四位有效数字
    print(f'Statistic = {stat:.4g}, p-value = {p:.4g}')
    if p<0.05:
        xianzhu.append(feat)


#### 卡方检验

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency

# 创建列联表
contingency_table = pd.crosstab(df['PTGENDER'], df['DX'])

# 打印列联表
print("Contingency Table:")
print(contingency_table)

# 进行卡方检验
chi2, p, dof, expected = chi2_contingency(contingency_table)

# 打印结果
print(f'\nChi-squared Statistic = {chi2:.4g}, p-value = {p:.4g}')
print(f'Degrees of freedom = {dof}')
print('Expected frequencies:')
print(expected)


#### fisher exact 检验

In [None]:
from scipy.stats import fisher_exact

# 创建列联表
contingency_table = pd.crosstab(df['PTMARRY'], df['DX'])

# 打印列联表
print("Contingency Table:")
print(contingency_table)

# # 进行 Fisher's Exact Test
# odds_ratio, p_value = fisher_exact(contingency_table)

# # 打印结果
# print(f'\nFisher\'s Exact Test:')
# print(f'Odds Ratio = {odds_ratio:.4g}, p-value = {p_value:.4g}')

chi2, p, dof, expected = chi2_contingency(contingency_table)

# 打印结果
print(f'\nChi-squared Statistic = {chi2:.4g}, p-value = {p:.4g}')
print(f'Degrees of freedom = {dof}')
print('Expected frequencies:')
print(expected)

In [None]:
# 根据组别进行分组并计算指标
for feat in features:
    print(f"\n检验指标：{feat}")
    
    # 对每个组进行计算
    for group in df['DX'].unique():
        group_data = df[df['DX'] == group][feat]
        
        # 计算中位数
        median_value = group_data.median()
        
        # 计算四分位数
        Q1 = group_data.quantile(0.25)
        Q3 = group_data.quantile(0.75)

        # 打印结果
        print(f'{group}: {median_value}({Q1}-{Q3})')