In [1]:
# import libraries
import os
import pandas as pd
import numpy as np
from scipy import stats

# define file paths
NON_SIC_PATH = "./Sepsis_plt/by_year/demographic/demographic_all.csv" # temporary path
SIC_PATH = "./Sepsis_plt/by_year/demographic/demographic_SIC_all.csv"
ANALYSIS_PATH = "./data_analysis/"

In [None]:
def calculate_p_values(): # calculate p-values for baseline characteristics
    non_sic_df = pd.read_csv(NON_SIC_PATH)
    sic_df = pd.read_csv(SIC_PATH)
    non_sic_df = non_sic_df.drop_duplicates(subset='Reference Key') # remove duplicate entries
    sic_df = sic_df.drop_duplicates(subset='Reference Key')
    p_values = {}

    # calculate p-value for sex
    if 'Sex' in non_sic_df.columns and 'Sex' in sic_df.columns:
        non_sic_count = non_sic_df['Sex'].value_counts()
        sic_count = sic_df['Sex'].value_counts()
        categories = list(set(non_sic_count.index) | set(sic_count.index))
        non_sic_counts = [non_sic_count.get(cat, 0) for cat in categories]
        sic_counts = [sic_count.get(cat, 0) for cat in categories]
        contingency = np.array([non_sic_counts, sic_counts])
        print("Sex:\n", contingency)
        _, p_value, _, _ = stats.chi2_contingency(contingency)
        p_values['Sex'] = f"{p_value:.2e}"
        print(p_values['Sex'])

    # calculate p-value for age
    if 'Admission Age (Year) (episode based)' in non_sic_df.columns and 'Admission Age (Year) (episode based)' in sic_df.columns:
        non_sic_count = pd.to_numeric(non_sic_df['Admission Age (Year) (episode based)'], errors='coerce').dropna()
        sic_count = pd.to_numeric(sic_df['Admission Age (Year) (episode based)'], errors='coerce').dropna()
        print("Non SIC mean age:", non_sic_count.mean(), "\nSIC mean age:", sic_count.mean())
        _, p_value = stats.ttest_ind(non_sic_count, sic_count, equal_var=False)
        p_values['Admission_Age'] = f"{p_value:.2e}"
        print(p_values['Admission_Age'])

    # calculate p-value for death rate
    if 'Episode Death (Y/N)' in non_sic_df.columns and 'Episode Death (Y/N)' in sic_df.columns:
        non_sic_count = non_sic_df['Episode Death (Y/N)'].value_counts()
        sic_count = sic_df['Episode Death (Y/N)'].value_counts()
        categories = list(set(non_sic_count.index) | set(sic_count.index))
        non_sic_death_counts = [non_sic_count.get(cat, 0) for cat in categories]
        sic_death_counts = [sic_count.get(cat, 0) for cat in categories]
        contingency = np.array([non_sic_death_counts, sic_death_counts])
        print("Mortality:\n", contingency)
        _, p_value, _, _ = stats.chi2_contingency(contingency)
        p_values['Mortality'] = f"{p_value:.2e}"
        print(p_values['Mortality'])

    # calculate p-value for 28-day mortality
    if '28_day_mortality' in non_sic_df.columns and '28_day_mortality' in sic_df.columns:
        non_sic_count = non_sic_df['28_day_mortality'].value_counts()
        sic_count = sic_df['28_day_mortality'].value_counts()
        categories = list(set(non_sic_count.index) | set(sic_count.index))
        non_sic_counts = [non_sic_count.get(cat, 0) for cat in categories]
        sic_counts = [sic_count.get(cat, 0) for cat in categories]
        contingency = np.array([non_sic_counts, sic_counts])
        print("28-day mortality:\n", contingency)
        _, p_value, _, _ = stats.chi2_contingency(contingency)
        p_values['28_day_mortality'] = f"{p_value:.2e}"
        print(p_values['28_day_mortality'])

    results_df = pd.DataFrame(list(p_values.items()), columns=['Variable', 'P_Value'])
    output_file = os.path.join(ANALYSIS_PATH, 'p-values.csv')
    results_df.to_csv(output_file, index=False)
    print(f"P-values saved to {output_file}")

In [27]:
calculate_p_values()

Sex:
 [[511085 512821]
 [ 29045  34376]]
4.55e-90
Non SIC mean age:  65.85245227589252 
SIC mean age:  66.52608757351666
6.56e-21
Mortality:
 [[939128  84680]
 [ 41964  21428]]
0.00e+00
28-day mortality:
 [[939512  84394]
 [ 47312  16109]]
0.00e+00


PermissionError: [Errno 13] Permission denied: './data_analysis/p-values.csv'