In [1]:
import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

In [17]:
#load pre-processed data
df = pd.read_csv('../results/copd_table_post_filtering.tsv', sep='\t', index_col='eid')

#read chd table generated with alternate age cutoff for aortic stenosis
chd_f = 'chd_subgroups_50yo_cutoff.tsv'
chd = pd.read_csv(chd_f, sep='\t', index_col='eid')

#match indexes
df = df.join(chd, lsuffix='_old')
df.dropna(subset=['chd'], inplace=True)

print(df['chd'].value_counts())
print(df['event'].value_counts())

0.0    479765
1.0      2055
Name: chd, dtype: int64
0.0    466808
1.0     15012
Name: event, dtype: int64


In [18]:
#stratified cox regression
age_sex_form = (
    'sex + age_at_ac1'
)

full_form_cat = (
    'sex + age_at_ac1 + bmi + ethnicity'
)

age_sex_strata = None

cat_strata = [
    'smoking',
    'hypertension_comb',
    'asthma',
    'TDI_binned'
]

formtypes = {
    'Age/Sex Adjusted': (df, age_sex_form, age_sex_strata),
    'Fully Adjusted': (df, full_form_cat, cat_strata)
}

results = {}
groups = ['All CHD', 'Isolated AoV', 'Noncomplex', 'Control']
res_cols = ['No. at Risk', 'COPD Diagnoses, n (%)', 'HR', '95% CI', 'P-Value']

for regtype, (data, form, strata) in formtypes.items():
    res_df = pd.DataFrame(index=groups, columns=res_cols)
    
    all_chd_form = form + ' + chd'
    subgroup_form = form + ' + complex_chd + noncomplex_chd + isolated_aov'
    
    if strata:
        cph1 = CoxPHFitter()
        cph1.fit(
            data,
            duration_col='yrs_duration',
            event_col='event',
            formula=all_chd_form,
            strata=strata)

        cph2 = CoxPHFitter()
        cph2.fit(
            data,
            duration_col='yrs_duration',
            event_col='event',
            formula=subgroup_form,
            strata=strata)
    else:
        cph1 = CoxPHFitter()
        cph1.fit(
            data,
            duration_col='yrs_duration',
            event_col='event',
            formula=all_chd_form)

        cph2 = CoxPHFitter()
        cph2.fit(
            data,
            duration_col='yrs_duration',
            event_col='event',
            formula=subgroup_form)
    
    reg_res = pd.concat((cph1.summary, cph2.summary))
    
    group_masks = {
        'All CHD': ('chd', data['chd'] == 1),
        'Noncomplex': ('noncomplex_chd', data['noncomplex_chd'] == 1),
        'Isolated AoV': ('isolated_aov', data['isolated_aov'] == 1),
        'Control': (None, data['chd'] == 0)
    }
    
    for group, (label, mask) in group_masks.items():
        events = data[mask & (data['event'] == 1)].index.size
        n = data[mask].index.size
        res_df.loc[group, 'No. at Risk'] = f'{n:,}'
        event_pc = 100 * events/n
        res_df.loc[group, 'COPD Diagnoses, n (%)'] = f'{events:,} ({round(event_pc, 1)})'
        if label:
            hr = round(reg_res.loc[label, 'exp(coef)'],2)
            hr_llim = round(reg_res.loc[label, 'exp(coef) lower 95%'],2)
            hr_ulim = round(reg_res.loc[label, 'exp(coef) upper 95%'],2)
            ci = f'({hr_llim}, {hr_ulim})'
            p = np.format_float_scientific(reg_res.loc[label, 'p'],2)
            
            res_df.loc[group, 'HR'] = hr
            res_df.loc[group, '95% CI'] = ci
            res_df.loc[group, 'P-Value'] = p
    
    results[regtype] = res_df

In [19]:
#build bav sensitivity coxreg table
t1 = results['Age/Sex Adjusted'].copy()
t2 = results['Fully Adjusted'].drop(['No. at Risk', 'COPD Diagnoses, n (%)'], axis=1).copy()
tab = pd.concat((t1, t2), axis=1).fillna('-').T.to_numpy()
for row in [5,2]:
    tab = np.insert(tab, row, ['']*4, 0)

groups = ['All CHD', 'Isolated AoV', 'Noncomplex', 'Control']
row_labels = [
    'No. at Risk',
    'COPD Diagnoses, n (%)', 
    'Age/Sex Adjusted:',
    'HR',
    '95% CI',
    'P-Value',
    'Fully Adjusted:',
    'HR',
    '95% CI',
    'P-Value'
]

#main_df = pd.DataFrame(tab, columns = groups)
main_df = pd.DataFrame(tab, index=row_labels, columns = groups)
main_df

Unnamed: 0,All CHD,Isolated AoV,Noncomplex,Control
No. at Risk,2055,673,1251,479765
"COPD Diagnoses, n (%)",158 (7.7),43 (6.4),103 (8.2),"14,854 (3.1)"
Age/Sex Adjusted:,,,,
HR,2.58,2.05,2.82,-
95% CI,"(2.2, 3.01)","(1.52, 2.76)","(2.32, 3.42)",-
P-Value,2.53e-32,2.69e-06,1.21e-25,-
Fully Adjusted:,,,,
HR,2.25,1.89,2.46,-
95% CI,"(1.92, 2.63)","(1.4, 2.55)","(2.02, 2.98)",-
P-Value,4.74e-24,3.15e-05,1.14e-19,-
