In [2]:
from scipy.stats import ttest_ind
import pandas as pd

file_path = '/home/changbae/fmri_project/MDMR/notebook/data/participant_demo_clinical.csv'
data = pd.read_csv(file_path)

# Columns to exclude
exclude_columns = ['HAID ID', 'Exp No.', 'fmri_code', 'Screening #', 'Enrollment #']

# Filter columns
columns_to_include = [col for col in data.columns if col not in exclude_columns]

# Filter data for relevant columns
filtered_data = data[columns_to_include]

# Split data into EXP and HC groups
exp_group = filtered_data[filtered_data['GROUP'] == 'EXP']
hc_group = filtered_data[filtered_data['GROUP'] == 'HC']

# Initialize lists to store results
results = []

# Calculate mean, std, and p-value for each column
for column in columns_to_include:
    if column == 'GROUP':
        continue
    exp_values = exp_group[column].dropna().astype(float)
    hc_values = hc_group[column].dropna().astype(float)
    
    mean_exp = exp_values.mean()
    std_exp = exp_values.std()
    mean_hc = hc_values.mean()
    std_hc = hc_values.std()
    t_stat, p_value = ttest_ind(exp_values, hc_values, equal_var=False)
    
    results.append({
        'Measure': column,
        'Mean_EXP': mean_exp,
        'STD_EXP': std_exp,
        'Mean_HC': mean_hc,
        'STD_HC': std_hc,
        'P_value': p_value
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)