In [1]:
import pandas as pd
data = pd.read_csv("./data/participant_demo_clinical_all.csv")

In [2]:
def create_summary_table(df):
    from scipy import stats
    import pandas as pd

    # Preprocessing
    df.columns = df.columns.str.strip()  # Strip any whitespace from the column names

    # Selecting relevant columns
    selected_columns = ['GROUP', '2.AGE', '1. SEX'] + [col for col in df.columns if col.startswith(('LSAS', 'MOCI', 'BFNE', 'PSWQ', 'Handedness', 'FCV-19S'))]
    df_selected = df[selected_columns]

    # Renaming columns for better readability
    df_selected.rename(columns={'2.AGE': 'Age', '1. SEX': 'Sex'}, inplace=True)

    # Splitting data into experimental (EXP) and control (HC) groups
    exp_group = df_selected[df_selected['GROUP'] == 'EXP']
    hc_group = df_selected[df_selected['GROUP'] == 'HC']

    # Define a function to calculate mean (SD) and p-value
    def mean_sd(series):
        return f"{series.mean():.2f} ({series.std():.2f})"

    def calculate_pvalue(exp_series, hc_series):
        return stats.ttest_ind(exp_series, hc_series, nan_policy='omit').pvalue

    # Create a summary table
    summary_table = pd.DataFrame(columns=['Measure', 'EXP', 'HC', 'p-value'])

    for column in df_selected.columns[2:]:  # Exclude 'GROUP', 'Age', 'Sex' columns
        exp_mean_sd = mean_sd(exp_group[column])
        hc_mean_sd = mean_sd(hc_group[column])
        p_value = calculate_pvalue(exp_group[column], hc_group[column])
        
        summary_table = pd.concat([summary_table, pd.DataFrame([{'Measure': column, 'EXP': exp_mean_sd, 'HC': hc_mean_sd, 'p-value': p_value}])], ignore_index=True)

    # Adding Age and Sex columns separately
    summary_table = pd.concat([summary_table, pd.DataFrame([{
        'Measure': 'Age',
        'EXP': mean_sd(exp_group['Age']),
        'HC': mean_sd(hc_group['Age']),
        'p-value': calculate_pvalue(exp_group['Age'], hc_group['Age'])
    }])], ignore_index=True)

    # Sex should be handled as a percentage of females
    exp_female_pct = (exp_group['Sex'].value_counts(normalize=True).get(2, 0)) * 100
    hc_female_pct = (hc_group['Sex'].value_counts(normalize=True).get(2, 0)) * 100

    summary_table = pd.concat([summary_table, pd.DataFrame([{
        'Measure': 'Sex (Female %)',
        'EXP': f"{exp_female_pct:.2f}%",
        'HC': f"{hc_female_pct:.2f}%",
        'p-value': calculate_pvalue(exp_group['Sex'] == 2, hc_group['Sex'] == 2)
    }])], ignore_index=True)

    return summary_table

# Using the function to create the summary table
summary_df = create_summary_table(data)
summary_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected.rename(columns={'2.AGE': 'Age', '1. SEX': 'Sex'}, inplace=True)


Unnamed: 0,Measure,EXP,HC,p-value
0,Sex,1.46 (0.50),1.50 (0.50),0.5733906
1,LSAS_performance,42.88 (11.84),12.40 (8.75),3.7847240000000004e-39
2,LSAS_social_interaction,37.70 (11.85),10.53 (7.91),2.235185e-35
3,LSAS,80.58 (22.55),22.93 (16.01),2.237433e-39
4,MOCI,16.30 (5.67),20.96 (8.02),4.330711e-05
5,MOCI_checking,4.56 (2.26),6.36 (2.53),6.793133e-06
6,MOCI_cleaning,6.79 (3.39),8.30 (3.81),0.009945855
7,MOCI_doubting,3.31 (2.11),4.00 (1.74),0.03019463
8,MOCI_slowness,3.65 (1.81),5.39 (2.32),6.39771e-07
9,BFNE,50.85 (5.58),33.03 (8.29),4.988795e-34


In [11]:
summary_df.to_csv("./data/summary_all.csv")

In [10]:
print(data.groupby("GROUP").agg(['mean', 'std']))

from IPython.display import display, HTML

# DataFrame을 HTML로 변환하여 출력
display(HTML(data.groupby("GROUP").agg(['mean', 'std']).to_html()))

      Unnamed: 0                Exp No.                1. SEX            \
            mean        std        mean         std      mean       std   
GROUP                                                                     
EXP    88.788889  46.345671  261.755556  184.895434  1.455556  0.500811   
HC     72.635135  47.657906  211.040541  181.308595  1.500000  0.503413   

           2.AGE           3-2. YR_EDU            ... MOCI_slowness            \
            mean       std        mean       std  ...          mean       std   
GROUP                                             ...                           
EXP    28.866667  7.519563   14.812500  2.571185  ...      3.654762  1.813564   
HC     29.932432  7.192711   15.390411  2.415584  ...      5.385714  2.317500   

            BFNE                 PSWQ           Handedness(true)            \
            mean       std       mean       std             mean       std   
GROUP                                                         

  print(data.groupby("GROUP").agg(['mean', 'std']))
  display(HTML(data.groupby("GROUP").agg(['mean', 'std']).to_html()))


Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Exp No.,Exp No.,1. SEX,1. SEX,2.AGE,2.AGE,3-2. YR_EDU,3-2. YR_EDU,Screening #,Screening #,STAI-X-1,STAI-X-1,STAI-X-2,STAI-X-2,HADS_anxiety,HADS_anxiety,HADS_depression,HADS_depression,SWLS,SWLS,GAD-7,GAD-7,PDSS,PDSS,LSAS_performance,LSAS_performance,LSAS_social_interaction,LSAS_social_interaction,LSAS,LSAS,MOCI,MOCI,MOCI_checking,MOCI_checking,MOCI_cleaning,MOCI_cleaning,MOCI_doubting,MOCI_doubting,MOCI_slowness,MOCI_slowness,BFNE,BFNE,PSWQ,PSWQ,Handedness(true),Handedness(true),FCV-19S,FCV-19S
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
GROUP,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2
EXP,88.788889,46.345671,261.755556,184.895434,1.455556,0.500811,28.866667,7.519563,14.8125,2.571185,89.606742,46.601214,59.988235,10.482973,60.411765,10.960203,10.952381,2.903694,9.166667,2.714678,7.5,5.704574,16.738095,5.309957,6.25,5.890313,42.880952,11.837662,37.702381,11.854316,80.583333,22.551258,16.297619,5.669151,4.559524,2.256723,6.785714,3.386975,3.309524,2.105507,3.654762,1.813564,50.845238,5.582878,57.831325,8.933111,43.470588,6.076082,5.535714,5.128344
HC,72.635135,47.657906,211.040541,181.308595,1.5,0.503413,29.932432,7.192711,15.390411,2.415584,73.716216,47.733714,33.671429,6.753896,33.742857,6.686967,12.385714,4.706742,7.028571,2.007027,16.957143,5.575401,8.028571,1.273719,0.114286,0.400827,12.4,8.74999,10.528571,7.912055,22.928571,16.012062,20.957143,8.018883,6.357143,2.52544,8.3,3.808267,4.0,1.744557,5.385714,2.3175,33.028571,8.289982,34.671429,9.051738,42.985714,5.417344,2.928571,3.548102
