In [None]:
import pandas as pd
import numpy as np
import os
from lifelines import CoxPHFitter
from statsmodels.stats import multitest

In [None]:
df = pd.read_csv('data/ukb_ecg_data_balanced_cluster.csv')

In [None]:
df['cluster_assign'].value_counts()

In [None]:
df['event'].value_counts()

In [None]:
df['cvd'].value_counts()

In [None]:
df['cvd'].unique()

In [None]:
covars = ['Sex', 'Age', 'TDI', 'BMI',  'SBP', 'DBP', 'Glucose', 'HDLc', 'LDLc', 'TG', 'CHOL', 'smoking_healthy', 'alcohol_healthy']
# df = df[['Eid', 'cluster_assign', 'insomnia_binary', 'cvd', 'time', 'event', 'time_censored'] + covars]

In [None]:
cvds = ['atrial_fibrillation', 'angina', 'chronic_artery_disease', 'peripheral_artery_disease', 'heart_failure', 'stroke', 'All_CVDs']

In [None]:
res_insomnia = np.zeros([len(cvds), 11])
df_data = df.copy()
for  i, cvd in enumerate(cvds):
    print(cvd)
    if cvd == 'All_CVDs':
        df_data['time_cvd'] = df_data['time']
        df_data['event_cvd'] = df_data['event']
    else:
        df_data['event_cvd'] = np.where(df_data['cvd'] == cvd, 1, 0)
        df_data['time_cvd'] = np.where(df_data['cvd'] == cvd, df_data['time'], df_data['time_censored'])

    cph = CoxPHFitter()
    formula = 'insomnia_score + ' + ' + '.join(covars)
    cph.fit(df_data, duration_col='time_cvd', event_col='event_cvd', formula=formula)
    res_insomnia[i] = cph.summary.loc['insomnia_score'].values

In [None]:
# HR for cluster 0 and cluster 1 in the case group, compared to the control group
df_data = df[df['cluster_assign'] == 0].copy()
res_cluster0 = np.zeros([len(cvds), 11])
for  i, cvd in enumerate(cvds):
    print(cvd)
    if cvd == 'All_CVDs':
        df_data['time_cvd'] = df_data['time']
        df_data['event_cvd'] = df_data['event']
    else:
        df_data['event_cvd'] = np.where(df_data['cvd'] == cvd, 1, 0)
        df_data['time_cvd'] = np.where(df_data['cvd'] == cvd, df_data['time'], df_data['time_censored'])
    # print(pd.crosstab(df_data['event_cvd'], df_data['insomnia_score']))
    cph = CoxPHFitter()
    formula = 'insomnia_score + ' + ' + '.join(covars)
    # formula = 'insomnia_score'
    cph.fit(df_data, duration_col='time_cvd', event_col='event_cvd', formula=formula)
    res_cluster0[i] = cph.summary.loc['insomnia_score'].values

In [None]:
df_data = df[df['cluster_assign'] == 1].copy()
res_cluster1 = np.zeros([len(cvds), 11])
for  i, cvd in enumerate(cvds):
    print(cvd)
    if cvd == 'All_CVDs':
        df_data['time_cvd'] = df_data['time']
        df_data['event_cvd'] = df_data['event']
    else:
        df_data['event_cvd'] = np.where(df_data['cvd'] == cvd, 1, 0)
        df_data['time_cvd'] = np.where(df_data['cvd'] == cvd, df_data['time'], df_data['time_censored'])
    cph = CoxPHFitter()
    formula = 'insomnia_score + ' + ' + '.join(covars)
    cph.fit(df_data, duration_col='time_cvd', event_col='event_cvd', formula=formula)
    res_cluster1[i] = cph.summary.loc['insomnia_score'].values

In [None]:
cols = cph.summary.columns.tolist()
print(cols)

In [None]:
res_cluster0 = pd.DataFrame(res_cluster0, columns=cols)
res_cluster0['CVD'] = cvds
res_cluster0['Group'] = 'T+'
res_cluster0 = res_cluster0[['Group', 'CVD'] + cols]
res_cluster0['P_FDR'] = multitest.fdrcorrection(res_cluster0['p'], alpha=0.05, method='indep', is_sorted=False)[1]
res_cluster1 = pd.DataFrame(res_cluster1, columns=cols)
res_cluster1['CVD'] = cvds
res_cluster1['Group'] = 'T-'
res_cluster1 = res_cluster1[['Group', 'CVD'] + cols]
res_cluster1['P_FDR'] = multitest.fdrcorrection(res_cluster1['p'], alpha=0.05, method='indep', is_sorted=False)[1]
res_no_cluster = pd.DataFrame(res_insomnia, columns=cols)
res_no_cluster['CVD'] = cvds
res_no_cluster['Group'] = 'All'
res_no_cluster = res_no_cluster[['Group', 'CVD'] + cols]
res_no_cluster['P_FDR'] = multitest.fdrcorrection(res_no_cluster['p'], alpha=0.05, method='indep', is_sorted=False)[1]
res_cluster = pd.concat([res_cluster0, res_cluster1, res_no_cluster], axis=0)
res_cluster.sort_values(['CVD', 'Group'], ascending=True, inplace=True)

In [None]:
res_cluster.rename(columns={
    'coef': 'beta',
    'exp(coef)': 'HR',
    'se(coef)': 'se (beta)',
    'coef lower 95%': 'beta lower 95% CI',
    'coef upper 95%': 'beta upper 95% CI',
    'exp(coef) lower 95%': 'HR lower 95% CI',
    'exp(coef) upper 95%': 'HR upper 95% CI',
}, inplace=True)
res_cluster.to_csv('results/hr_cvds.csv', index=False)