In [1]:
from setup import general
from setup import stat
from setup import r

import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
from scipy import stats

import rpy2.robjects as robjects # use basic R function


In [2]:
%store -r Pt_included Pt_Demographic Con nreps

# 一般资料比较

In [3]:
# 年龄的两独立样本t检验
gen_pt = Pt_Demographic.describe()
gen_con = Con.describe()

In [4]:
demogrphic_result = []
contrast_list = ['Age','Year_of_Education', 'Height','Weight']
for col in contrast_list:
    lev = stats.levene(Con[col], Pt_Demographic[col])
    equal_var = True if lev[1]>0.05 else False
    t_result = stats.ttest_ind(Con[col], Pt_Demographic[col], equal_var=equal_var)
    
    perm_ind_t_p = robjects.r['perm_ind_t'](r.to_csv_r(Con, 'hc', col), r.to_csv_r( Pt_Demographic, 'pt', col), 3 + len(col), nreps=nreps)
    
    res = list(stats.shapiro(Con[col])+stats.shapiro( Pt_Demographic[col])+lev+tuple([round(t_result[0],5),round(t_result[1],5)])+tuple(perm_ind_t_p))
    demogrphic_result.append(res)
demogrphic_result_matrix = pd.DataFrame(demogrphic_result, index=contrast_list, columns=['hc_norm_stat','hc_norm_p', 'pt_norm_stat','pt_norm_p','levene_stat', 'levene_p','ind_t','ind_t_p','perm_ind_t_p'])
demogrphic_result_matrix

Unnamed: 0,hc_norm_stat,hc_norm_p,pt_norm_stat,pt_norm_p,levene_stat,levene_p,ind_t,ind_t_p,perm_ind_t_p
Age,0.960477,0.61086,0.953799,0.428455,0.006229,0.937531,-0.64648,0.52207,0.5136
Year_of_Education,0.883493,0.029898,0.90836,0.059315,6.670882,0.014011,0.81591,0.42284,0.4324
Height,0.897663,0.052324,0.95075,0.378615,0.046001,0.831384,0.31217,0.75671,0.7754
Weight,0.915296,0.106711,0.96366,0.619264,0.053668,0.818111,0.34799,0.72987,0.7442


In [10]:
# gender
gender_matrix, gender_oddsratio, gender_fisher_p = stat.chi2(Con,Pt_Demographic, 'Gender')
Pt_Male = gender_matrix[0][0]; Pt_Female = gender_matrix[1][0]; Con_Male = gender_matrix[0][1]; Con_Female= gender_matrix[1][1]
gender_oddsratio, gender_fisher_p

[[14, 13], [6, 5]] 38 5
Fisher Exact


(0.8974358974358975, 1.0)

In [11]:
# dominant hand
hand_matrix, hand_oddsratio, hand_fisher_p = stat.chi2(Con,Pt_Demographic, 'Dominant_Hand')
Pt_Right = hand_matrix[0][0]; Pt_Left = hand_matrix[1][0]; Con_Right = hand_matrix[0][1]; Con_Left = hand_matrix[1][1]
hand_oddsratio, hand_fisher_p

[[19, 18], [1, 0]] 38 0
Fisher Exact


(0.0, 1.0)

<a id="demographic_result"></a>
## 结果汇总

In [13]:
general_info = pd.DataFrame(np.zeros(28).reshape(7,4), index=['Count','Age', 'Gender M/F', 'Height','Weight','Education','Dominant Hand R/L'], columns=['Patient', 'Control', 'Stat Value', 'P Value'])

# First two colomns
general_info.loc['Count', 'Patient'] = round(gen_pt.loc['count','Gender'],0)
general_info.loc['Count', 'Control'] = round(gen_con.loc['count','Gender'],0)
general_info.loc['Gender M/F', 'Patient'] = "{}/{}".format(Pt_Male, Pt_Female)
general_info.loc['Gender M/F', 'Control'] = "{}/{}".format(Con_Male, Con_Female)
general_info.loc['Dominant Hand R/L', 'Patient'] = "{}/{}".format(Pt_Right, Pt_Left)
general_info.loc['Dominant Hand R/L', 'Control'] = "{}/{}".format(Con_Right, Con_Left)
general_info.loc['Height', 'Patient'] = "{} ± {}".format(Pt_Demographic.describe().loc['mean','Height'], round(Pt_Demographic.describe().loc['std','Height'],2))
general_info.loc['Height', 'Control'] = "{} ± {}".format(round(Con.describe().loc['mean','Height'],2), round(Con.describe().loc['std','Height'],2))
general_info.loc['Weight', 'Patient'] = "{} ± {}".format(Pt_Demographic.describe().loc['mean','Weight'], round(Pt_Demographic.describe().loc['std','Weight'],2))
general_info.loc['Weight', 'Control'] = "{} ± {}".format(round(Con.describe().loc['mean','Weight'],2), round(Con.describe().loc['std','Weight'],2))

for v in ['Age','Education']:
    v1 = v
    if v == 'Education':
        v1 = 'Year_of_Education'
    elif v == 'Dominant Hand':
        v1 = 'Dominant_Hand'
    for tp in ['Patient','Control']:
        df_fill = gen_pt
        if tp == 'Control':
            df_fill = gen_con
        general_info.loc[v,tp] = "{}±{}".format(round(df_fill.loc['mean',v1], 2), round(df_fill.loc['std', v1], 2))

# Last two columns
general_info.loc['Count', 'Stat Value'] = r"/"
general_info.loc['Count', 'P Value'] = r"/"         
general_info.loc['Age','Stat Value'] = demogrphic_result_matrix.loc['Age', 'ind_t']
general_info.loc['Age','P Value'] = demogrphic_result_matrix.loc['Age', 'ind_t_p']
general_info.loc['Gender M/F','Stat Value'] = gender_oddsratio
general_info.loc['Gender M/F','P Value'] = gender_fisher_p
general_info.loc['Education','Stat Value'] = demogrphic_result_matrix.loc['Year_of_Education', 'ind_t']
general_info.loc['Education','P Value'] = demogrphic_result_matrix.loc['Year_of_Education', 'perm_ind_t_p']
general_info.loc['Height','Stat Value'] = demogrphic_result_matrix.loc['Height', 'ind_t']
general_info.loc['Height','P Value'] = demogrphic_result_matrix.loc['Height', 'ind_t_p']
general_info.loc['Weight','Stat Value'] = demogrphic_result_matrix.loc['Weight', 'ind_t']
general_info.loc['Weight','P Value'] = demogrphic_result_matrix.loc['Weight', 'ind_t_p']
general_info.loc['Dominant Hand R/L','Stat Value'] = hand_oddsratio
general_info.loc['Dominant Hand R/L','P Value'] = hand_fisher_p
print("采用的方法为两独立样本t检验、费舍确切概率法。正态性检验、方差齐性检验见上")

%store general_info demogrphic_result_matrix
general_info

采用的方法为两独立样本t检验、费舍确切概率法。正态性检验、方差齐性检验见上
Stored 'general_info' (DataFrame)
Stored 'demogrphic_result_matrix' (DataFrame)


Unnamed: 0,Patient,Control,Stat Value,P Value
Count,20.0,18.0,/,/
Age,56.35±9.49,54.28±10.27,-0.64648,0.52207
Gender M/F,14/6,13/5,0.897436,1.0
Height,167.05 ± 6.89,167.83 ± 8.56,0.31217,0.75671
Weight,69.3 ± 12.66,70.75 ± 13.01,0.34799,0.72987
Education,10.95±2.16,11.94±4.75,0.81591,0.4324
Dominant Hand R/L,19/1,18/0,0.0,1.0


# VBM 一般资料比较

In [16]:
# 均为右利手
%store -r vbm
Demo_ptvbm = Pt_Demographic[Pt_Demographic.index.isin(vbm.index)]
Demo_hcvbm = Con[Con.index.isin(vbm.index)]
vbm_demogrphic_result = []
contrast_list = ['Age','Year_of_Education', 'Height','Weight']
for col in contrast_list:
    lev = stats.levene(Demo_hcvbm[col], Demo_ptvbm[col])
    equal_var = True if lev[1]>0.05 else False
    t_result = stats.ttest_ind(Demo_hcvbm[col], Demo_ptvbm[col], equal_var=equal_var)
    
    perm_ind_t_p = robjects.r['perm_ind_t'](r.to_csv_r(Demo_hcvbm, 'hc', col), r.to_csv_r(Demo_ptvbm, 'pt', col), 3 + len(col), nreps=nreps)
    
    res = list(stats.shapiro(Demo_hcvbm[col])+stats.shapiro( Demo_ptvbm[col])+lev+tuple([round(t_result[0],5),round(t_result[1],5)])+tuple(perm_ind_t_p))
    vbm_demogrphic_result.append(res)
demogrphic_result_matrix = pd.DataFrame(vbm_demogrphic_result, index=contrast_list, columns=['hc_norm_stat','hc_norm_p', 'pt_norm_stat','pt_norm_p','levene_stat', 'levene_p','ind_t','ind_t_p','perm_ind_t_p'])
demogrphic_result_matrix

Unnamed: 0,hc_norm_stat,hc_norm_p,pt_norm_stat,pt_norm_p,levene_stat,levene_p,ind_t,ind_t_p,perm_ind_t_p
Age,0.96204,0.670096,0.895039,0.136868,0.05735,0.812542,-0.62604,0.53655,0.523
Year_of_Education,0.8572,0.013836,0.931926,0.400974,1.852721,0.18472,0.24988,0.80457,0.813
Height,0.891186,0.048468,0.943849,0.549485,0.068049,0.796178,0.4016,0.69114,0.717
Weight,0.917516,0.134054,0.948073,0.609008,0.768507,0.388413,0.53233,0.59885,0.5994


In [18]:
def get_range(df, column, decimal=2):
    """
    return mean±std
    """
    return str(round(df.loc['mean',column],decimal))+'±'+str(round(df.loc['std',column],decimal))

general_vbm = pd.DataFrame(np.zeros(24).reshape(6,4), index=['Count','Age', 'Gender M/F', 'Height','Weight','Education'], columns=['Patient', 'Control', 'Stat Value', 'P Value'])
ptvbm=Demo_ptvbm.describe()
hcvbm=Demo_hcvbm.describe()

vbm_gender_matrix, vbm_gender_odds, vbm_gender_p = stat.chi2(Demo_hcvbm, Demo_ptvbm, 'Gender')
Pt_Male = vbm_gender_matrix[0][0]; Pt_Female = vbm_gender_matrix[1][0]; Con_Male = vbm_gender_matrix[0][1]; Con_Female= vbm_gender_matrix[1][1]

general_vbm.loc['Count','Patient'] = ptvbm.loc['count','Gender']
general_vbm.loc['Count','Control'] = hcvbm.loc['count','Gender']
general_vbm.loc['Age','Patient'] = get_range(ptvbm,'Age')
general_vbm.loc['Age','Control'] = get_range(hcvbm,'Age')
general_vbm.loc['Height','Patient'] = get_range(ptvbm,'Height')
general_vbm.loc['Height','Control'] = get_range(hcvbm,'Height')
general_vbm.loc['Weight','Patient'] = get_range(ptvbm,'Weight')
general_vbm.loc['Weight','Control'] = get_range(hcvbm,'Weight')
general_vbm.loc['Education','Patient'] = get_range(ptvbm,'Year_of_Education')
general_vbm.loc['Education','Control'] = get_range(hcvbm,'Year_of_Education')
general_vbm.loc['Gender M/F','Patient'] = '{}/{}'.format(Pt_Male, Pt_Female)
general_vbm.loc['Gender M/F','Control'] = '{}/{}'.format(Con_Male, Con_Female)

general_vbm.loc['Count', 'Stat Value'] = r"/"
general_vbm.loc['Count', 'P Value'] = r"/"         
general_vbm.loc['Age','Stat Value'] = demogrphic_result_matrix.loc['Age', 'ind_t']
general_vbm.loc['Age','P Value'] = demogrphic_result_matrix.loc['Age', 'ind_t_p']
general_vbm.loc['Gender M/F','Stat Value'] = vbm_gender_odds
general_vbm.loc['Gender M/F','P Value'] = vbm_gender_p
general_vbm.loc['Education','Stat Value'] = demogrphic_result_matrix.loc['Year_of_Education', 'ind_t']
general_vbm.loc['Education','P Value'] = demogrphic_result_matrix.loc['Year_of_Education', 'perm_ind_t_p']
general_vbm.loc['Height','Stat Value'] = demogrphic_result_matrix.loc['Height', 'ind_t']
general_vbm.loc['Height','P Value'] = demogrphic_result_matrix.loc['Height', 'ind_t_p']
general_vbm.loc['Weight','Stat Value'] = demogrphic_result_matrix.loc['Weight', 'ind_t']
general_vbm.loc['Weight','P Value'] = demogrphic_result_matrix.loc['Weight', 'ind_t_p']

print("采用的方法为两独立样本t检验、费舍确切概率法。正态性检验、方差齐性检验见上")
general_vbm

[[7, 12], [5, 5]] 29 5
Fisher Exact
采用的方法为两独立样本t检验、费舍确切概率法。正态性检验、方差齐性检验见上


Unnamed: 0,Patient,Control,Stat Value,P Value
Count,12.0,17.0,/,/
Age,56.83±9.79,54.41±10.57,-0.62604,0.53655
Gender M/F,7/5,12/5,0.583333,0.694153
Height,165.75±7.02,166.88±7.78,0.4016,0.69114
Weight,66.5±14.51,69.03±11.1,0.53233,0.59885
Education,11.33±2.27,11.71±4.78,0.24988,0.813
