In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from lifelines import CoxPHFitter
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test

sys.path.append(os.path.abspath('..'))
from utils.utils_constants import (VESSEL_NEPTUNE_PAT_INFO_W_SCORE_W_FEATURE_PATH as  VESSEL_PAT_INFO_W_SCORE_W_FEATURE_PATH,
                                   DISEASE_TYPES, ARTERY_TYPES,
)
import warnings
import pandas as pd

# Suppress the specific SettingWithCopyWarning
pd.options.mode.chained_assignment = None  # default='warn'
warnings.filterwarnings("ignore")

SEVERITY_MAPPING = {
    '0:absent': 0,
    '1:mild (1-25%)': 1,
    '2:moderate (26-50%)': 2,
    '3:severe (>50%)': 3
}

DEMOGRAPHICS = ['PAT_Sex', 'PAT_Race', 'PAT_Hispanic', 'PAT_AgeV3']
CLININCAL_DATA = ['PAT_Cohort', 'eGFRatBx', 'UPCRatBx']
DESCRIPTOR = ['ArterioSclerosis', "ArterialHyalinosis"]

In [2]:
suffix = "_measurements_exclude_hya_manual"

In [3]:
def binary_map(x, positive_value):
    return 1 if x == positive_value else 0

In [4]:
# for x in pat_df.columns:
#     if "Num" in x:
#         print(x)

In [5]:
agg_feature_path = VESSEL_PAT_INFO_W_SCORE_W_FEATURE_PATH.replace(".csv", f"{suffix}.csv")
pat_df = pd.read_csv(agg_feature_path)

pat_df = pat_df[pat_df["Num_All_Arteries"] > 0]
pat_df.dropna(subset=CLININCAL_DATA, inplace=True)
print("Number of Samples:", len(pat_df))

# Print out the number of samples after removing NaN values
# print("Number of Samples:", len(pat_df))

# # Calculating medians and IQRs for eGFR and UPCR
# egfr_median = np.median(pat_df['eGFRatBx'])
# egfr_25th = np.percentile(pat_df['eGFRatBx'], 25)
# egfr_75th = np.percentile(pat_df['eGFRatBx'], 75)
# egfr_iqr = egfr_75th - egfr_25th

# upcr_median = np.median(pat_df['UPCRatBx'])
# upcr_25th = np.percentile(pat_df['UPCRatBx'], 25)
# upcr_75th = np.percentile(pat_df['UPCRatBx'], 75)
# upcr_iqr = upcr_75th - upcr_25th

# # Output results
# print(f"Median eGFR: {egfr_median}, IQR: ({egfr_25th}, {egfr_75th})")
# print(f"Median UPCR: {upcr_median}, IQR: ({upcr_25th}, {upcr_75th})")

# under_18_count = np.sum(pat_df['PAT_AgeV3'] < 18)
# total_samples = len(pat_df)
# under_18_percent = under_18_count / total_samples * 100

# # Output the count and percentage
# print("Number of samples under 18:", under_18_count)
# print("Percentage of samples under 18:", under_18_percent)


Number of Samples: 225


In [8]:
pat_df["InterstitialFibrosis"]

0       8.0
1      71.0
2       1.0
3      25.0
4       NaN
       ... 
242    55.0
243    20.0
244     6.0
245    69.0
246     3.0
Name: InterstitialFibrosis, Length: 225, dtype: float64

In [None]:
# # Calculate race percentages
# race_counts = pat_df['PAT_Race'].value_counts(normalize=False) 
# print("Race distribution:")
# for race, percentage in race_counts.items():
#     print(f"{race}: {percentage:.2f}%")

In [None]:
# Applying the function to various columns
pat_df['PAT_Hispanic'] = pat_df['PAT_Hispanic'].apply(lambda x: binary_map(x, '1: Hispanic or Latino'))
pat_df['PAT_Race'] = pat_df['PAT_Race'].apply(lambda x: binary_map(x, '3: Black/African American'))
pat_df['PAT_Sex'] = pat_df['PAT_Sex'].apply(lambda x: binary_map(x, '2: Female'))
pat_df['PAT_Cohort'] = pat_df['PAT_Cohort'].apply(lambda x: binary_map(x, '2 - MCD'))
pat_df['Immunosupression_30dBfOrAtBx'] = pat_df['Immunosupression_30dBfOrAtBx'].apply(lambda x: binary_map(x, '1: Yes'))

pat_df['ArterioSclerosis'] = pat_df['ArterioSclerosis'].map(SEVERITY_MAPPING)
pat_df['ArterialHyalinosis'] = pat_df['ArterialHyalinosis'].map(SEVERITY_MAPPING)

covariates_to_normalize = ['PAT_AgeV3', 'eGFRatBx', 'UPCRatBx', 'ArterioSclerosis', 'ArterialHyalinosis']  # add numerical columns here
scaler = StandardScaler() # choose a scaler?
pat_df[covariates_to_normalize] = scaler.fit_transform(pat_df[covariates_to_normalize]) # in place
pat_df['DaysBXtoESRDorEGFR40_LR'] = pd.to_numeric(pat_df['DaysBXtoESRDorEGFR40_LR'], errors='coerce')
pat_df['ESRDorEGFR40BX_LR'] = pat_df['ESRDorEGFR40BX_LR'].map({'1: Yes': 1, '0: No': 0}).astype(int)

# # Calculating percentages for the cohort
# total_samples = len(pat_df)
# mcd_percent = np.sum(pat_df['PAT_Cohort']) / total_samples * 100
# fsgs_percent = (total_samples - np.sum(pat_df['PAT_Cohort'])) / total_samples * 100
# male_percent = (total_samples - np.sum(pat_df['PAT_Sex'])) / total_samples * 100
# female_percent = np.sum(pat_df['PAT_Sex']) / total_samples * 100

# # Output results
# print(f"Percentage of MCD: {np.sum(pat_df['PAT_Cohort'])} ({mcd_percent}%), FSGS: {(total_samples - np.sum(pat_df['PAT_Cohort']))} ({fsgs_percent}%)")
# print(f"Percentage of Males: {(total_samples - np.sum(pat_df['PAT_Sex']))} ({male_percent}%), Females: {np.sum(pat_df['PAT_Sex'])} ({female_percent}%)")



# # Hispanic vs. Non-Hispanic
# hispanic_count = np.sum(pat_df['PAT_Hispanic'])
# non_hispanic_count = total_samples - hispanic_count
# print(f"Hispanic: {hispanic_count} ({hispanic_count / total_samples * 100:.2f}%), Non-Hispanic: {non_hispanic_count} ({non_hispanic_count / total_samples * 100:.2f}%)")

In [None]:
# Count the number of each value and calculate percentages
value_counts = pat_df['ArterioSclerosis'].value_counts(dropna=False)
percentage = pat_df['ArterioSclerosis'].value_counts(normalize=True, dropna=False) * 100

# Combine into a single DataFrame for a neat display
result = pd.DataFrame({
    'Count': value_counts,
    'Percentage': percentage
})

# Display the result
print(result)

# Specify the columns you are interested in
columns_of_interest = ['Num_Arterioles', 'Num_Interlobular_Arteries', 'Num_Arcuate_Arteries']

# Calculating the number of rows with values > 0 only for the specified columns
greater_than_zero_counts = (pat_df[columns_of_interest] > 0).sum()

# Calculating the sum of each specified column
sum_values = pat_df[columns_of_interest].sum()

# Combine the results into a single DataFrame for a clear presentation
result = pd.DataFrame({
    'Count of >0': greater_than_zero_counts,
    'Sum of Values': sum_values
})

print(result)

In [None]:
# feature_names = ['Intima Area Ratio', 'Media Area Ratio', 'Lumen Area Ratio', 'Hyalinosis Area Ratio',
#                  'Intima Average', 'Intima Peak Height', 'Media Average', 'Media Peak Height', 'Ratio Average', 'Ratio Peak Height']

selected_features_by_type = {'Arterioles': {'Hyalinosis Area Ratio',
  'Intima Area Ratio',
  'Intima Median',
  'Log Artery Area',
  'Lumen Area Ratio'},
 'Interlobular Arteries': {'Intima Area Ratio', 'Ratio Average'},
 'Arcuate Arteries': {'Intima Area Ratio', 'Ratio Median'}}
agged_features_by_type = {}
agged_scores_by_type = {}

for artery_type in ARTERY_TYPES:
    agged_features = []
    for feature_name in selected_features_by_type[artery_type]:
        # if artery_type in ["Arcuate Arteries"] and "Hyalinosis" in feature_name: continue
        # if artery_type in ["Interlobular Arteries"] and "Hyalinosis Area" in feature_name: continue
        for agg_metric in ["Max", "Median", "75th"]:
            agged_feature = '_'.join([agg_metric, feature_name, "in", artery_type])
            agged_features.append(agged_feature.replace(" ", "_"))
    agged_features_by_type[artery_type] = agged_features

    score_features = []
    for disease in DISEASE_TYPES:
        if artery_type == "Arcuate Arteries" and disease == "Hyalinosis": continue
        for agg_metric in ["Max", "Median", "75th"]:
            score_feature= '_'.join([agg_metric, disease, "Severity", "in", artery_type])
            score_features.append(score_feature.replace(" ", "_"))
    agged_scores_by_type[artery_type] = score_features
    

In [None]:
agged_features_by_type

In [None]:
for artery_type in ARTERY_TYPES:
    scaler = StandardScaler()
    features = agged_features_by_type[artery_type]
    print(features)
    pat_df[features] = scaler.fit_transform(pat_df[features]) # in place

In [None]:
def survival_analysis(pat_df_selected, feature_name, percentile, c=""):
    # Determine the specified percentile value of the feature
    threshold = pat_df_selected[feature_name].quantile(percentile)

    # Create a new column for group based on whether the feature value is above the specified percentile
    if percentile == .5:
        pat_df_selected['Group'] = np.where(pat_df_selected[feature_name] > threshold, f'Above Median', f'Below or Equal Median')
    else:
        pat_df_selected['Group'] = np.where(pat_df_selected[feature_name] > threshold, f'Above {percentile*100}th', f'Below or Equal {percentile*100}th')

    # Prepare data for log-rank test
    durations = []
    events = []
    labels = []
    assert(len(pat_df_selected['Group'].unique()) == 2)
    for group in sorted(pat_df_selected['Group'].unique()):
        df_sub = pat_df_selected[pat_df_selected['Group'] == group]
        T = df_sub["DaysBXtoESRDorEGFR40_LR"]
        E = df_sub["ESRDorEGFR40BX_LR"]
        durations.append(T)
        events.append(E)
        labels.append(group)

    # Perform the log-rank test
    result = logrank_test(durations[0], durations[1], event_observed_A=events[0], event_observed_B=events[1])
    p_str = "p<0.05" if result.p_value < 0.05 else f"p={result.p_value:.3f}"
    
    if result.p_value < 0.1:
        fig, ax = plt.subplots()
        for group, T, E in zip(labels, durations, events):
            if len(T) > 0:  # Ensure there is data to fit
                km = KaplanMeierFitter()
                km.fit(durations=T, event_observed=E, label=group)
                km.plot_survival_function(ax=ax, show_censors=True)

        ax.set_xlabel('Days from Biopsy to ESRD or EGFR < 40', fontsize=12)
        ax.set_ylabel('Survival Probability', fontsize=12)
        ax.legend()
        ax.set_title(f"({c}){feature_name}: \n log rank test: {p_str}", fontsize=12, y=-0.25, pad=-5)
        plt.show()

def cox_model_analysis(df, base, features, uni=False):
    # Create a figure and axis object with proper dimensions
    # Initialize and fit the Cox proportional hazards model
    if len(features) > 0:
        cph = CoxPHFitter(penalizer=0.01, l1_ratio=1)
        cph.fit(df, duration_col='DaysBXtoESRDorEGFR40_LR', event_col='ESRDorEGFR40BX_LR', formula="+".join(features))
        summary = cph.summary
        # Compute the absolute values of the coefficients and sort them
        summary['abs_coef'] = summary['coef'].abs()
        summary_sorted = summary.sort_values(by='abs_coef', ascending=False)
        selected_features = summary_sorted[summary_sorted["abs_coef"] > 0.1].index.values.tolist()
        if uni:
            # fig, axs = plt.subplots(1, len(selected_features), figsize=(5*len(selected_features), 5))  
            for i, feature_name in enumerate(selected_features):
                survival_analysis(df, feature_name, .5, c="") 
            # plt.tight_layout()
            # plt.show()
    else:
        selected_features = features

    # _, ax = plt.subplots(figsize=(10, 8))  # Use subplots instead of figure

    cph = CoxPHFitter(penalizer=0.01, l1_ratio=1)
    cph.fit(df, duration_col='DaysBXtoESRDorEGFR40_LR', event_col='ESRDorEGFR40BX_LR', formula="+".join(base + selected_features))
    print(f"Cox Model Concordance: {cph.concordance_index_:.2f}")
    # Plotting the hazard ratios for each feature
    # cph.plot(ax=ax)
    # Setting the title with the concordance index
    # ax.set_title(f"Cox Model Concordance: {cph.concordance_index_:.2f}")
    # plt.show()  # Ensure the plot is displayed
    return cph, selected_features


def cox_model_single(df, base, feature):
    # _, ax = plt.subplots(figsize=(10, 8))  # Use subplots instead of figure

    cph = CoxPHFitter(penalizer=0.01, l1_ratio=1)
    cph.fit(df, duration_col='DaysBXtoESRDorEGFR40_LR', event_col='ESRDorEGFR40BX_LR', formula="+".join(base + [feature]))
    # Plotting the hazard ratios for each feature
    # cph.plot(ax=ax)
    # Setting the title with the concordance index
    # ax.set_title(f"Cox Model Concordance: {cph.concordance_index_:.2f}")
    # plt.show()  # Ensure the plot is displayed
    return cph


In [None]:
agged_features = agged_features_by_type["Arcuate Arteries"]
score_features = agged_scores_by_type["Arcuate Arteries"]
pat_df_selected = pat_df.dropna(subset = DEMOGRAPHICS + CLININCAL_DATA  + DESCRIPTOR + 
                                score_features + agged_features, inplace = False)
print(pat_df_selected.shape)
# for feature in agged_features:
#     fig, axs = plt.subplots(1, 1, figsize=(8, 5))
#     survival_analysis(pat_df_selected, feature, .5, axs, c="") 
#     plt.tight_layout()
#     plt.show()
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , [])
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , DESCRIPTOR)
cph, selected_features = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , score_features)
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , agged_features, uni=True)
# cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , score_features + agged_features)
for feature in selected_features:
    # Unadjusted model
    cph = cox_model_single(pat_df_selected, [], feature)
    row = cph.summary.loc[feature, ["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%", "p"]]
    hr, p = row["exp(coef)"], row["p"]
    lower_95, upper_95 = row["exp(coef) lower 95%"], row["exp(coef) upper 95%"]
    print(f"Unadjusted {feature}: HR {hr:.2f} ({lower_95:.2f}-{upper_95:.2f}), p: {p:.3f}")

    # Adjusted model
    cph = cox_model_single(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA, feature)
    row = cph.summary.loc[feature, ["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%", "p"]]
    hr, p = row["exp(coef)"], row["p"]
    lower_95, upper_95 = row["exp(coef) lower 95%"], row["exp(coef) upper 95%"]
    print(f"Adjusted {feature}: HR {hr:.2f} ({lower_95:.2f}-{upper_95:.2f}), p: {p:.3f}")


In [None]:
# cph = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , agged_features, uni=True)
# summary = cph.summary
# summary['abs_coef'] = summary['coef'].abs()
# summary_sorted = summary.sort_values(by='abs_coef', ascending=False)
# # Iterate over the sorted summary and print relevant statistics
# for index, row in summary_sorted.iterrows():
#     feature = index  # The feature name
#     if feature not in agged_features: continue
#     hr, hr_lower, hr_upper, p = row["exp(coef)"], row["exp(coef) lower 95%"], row["exp(coef) upper 95%"], row["p"]
#     print(f"{feature} HR: {hr:.3f} (95% CI: {hr_lower:.3f} - {hr_upper:.3f}) p: {p:.3g}")


In [None]:
# suffix = "_measurements_exclude_hya_manual"
# collected_features = pd.read_csv(FEATURES_PATH.replace(".xlsx", f"{suffix}.csv"))
# collected_features.shape

# suffix = "_measurements_exclude_hya_manual_lumen_convex"
# collected_features_convex = pd.read_csv(FEATURES_PATH.replace(".xlsx", f"{suffix}.csv"))
# collected_features_convex

# # # Find the rows where "Intima Convexity" is greater than 0.8
# mask = collected_features["Artery Area"] < 200000

# # # Replace the rows in collected_features with those from collected_features_convex where the condition is met
# collected_features.loc[mask] = collected_features_convex.loc[mask]


# # collected_features = collected_features[collected_features["Lumen Area Ratio"] > 0.02]
# collected_features = collected_features[collected_features["Artery Area"] > 50000]
# collected_features = collected_features[~collected_features["Image Name"].isin(discard)]

# print(collected_features.shape)
# for artery_type in ARTERY_TYPES:
#     print(artery_type)
#     disease_type = "Arteriosclerosis"
#     feature_names = ['Intima Average', 'Intima Peak Height', 'Media Average', 'Media Peak Height', 
#                     'Ratio Average', 'Ratio Peak Height']
#     collected_features_selected = collected_features.loc[(collected_features["Artery Type"] == artery_type)
#                                                         #  &(collected_features["Hyalinosis Severity"] == 0)
#                                                         ]
#     collected_features_selected = collected_features_selected.dropna(subset=feature_names)
#     # collected_features_selected = collected_features_selected[~collected_features_selected["Image Name"].isin(discard)]
#     # print(collected_features_selected.shape)

#     fig, axs = plt.subplots(1, 6, figsize=(25, 5))
#     for i, feature_name in enumerate(feature_names):
#         violin_plots(collected_features_selected, feature_name, disease_type, axs[i], artery_type)  
#     plt.tight_layout()
#     plt.show()


In [None]:
agged_features = agged_features_by_type["Interlobular Arteries"]
score_features = agged_scores_by_type["Interlobular Arteries"]

pat_df_selected = pat_df.dropna(subset = DEMOGRAPHICS + CLININCAL_DATA + DESCRIPTOR + 
                                score_features + agged_features, inplace = False)
print(pat_df_selected.shape)

# print(pat_df_selected.shape)
# for feature in agged_features:
#     fig, axs = plt.subplots(1, 1, figsize=(8, 5))
#     survival_analysis(pat_df_selected, feature, .5, axs, c="") 
#     plt.tight_layout()
    # plt.show()

cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , [])
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , DESCRIPTOR)
cph, selected_features = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , score_features)
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , agged_features, uni=True)
# cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , score_features + agged_features)
for feature in selected_features:
    # Unadjusted model
    cph = cox_model_single(pat_df_selected, [], feature)
    row = cph.summary.loc[feature, ["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%", "p"]]
    hr, p = row["exp(coef)"], row["p"]
    lower_95, upper_95 = row["exp(coef) lower 95%"], row["exp(coef) upper 95%"]
    print(f"Unadjusted {feature}: HR {hr:.2f} ({lower_95:.2f}-{upper_95:.2f}), p: {p:.3f}")

    # Adjusted model
    cph = cox_model_single(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA, feature)
    row = cph.summary.loc[feature, ["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%", "p"]]
    hr, p = row["exp(coef)"], row["p"]
    lower_95, upper_95 = row["exp(coef) lower 95%"], row["exp(coef) upper 95%"]
    print(f"Adjusted {feature}: HR {hr:.2f} ({lower_95:.2f}-{upper_95:.2f}), p: {p:.3f}")


In [None]:
# cph = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , agged_features, uni=False)

# summary = cph.summary
# summary['abs_coef'] = summary['coef'].abs()
# summary_sorted = summary.sort_values(by='abs_coef', ascending=False)
# # Iterate over the sorted summary and print relevant statistics
# for index, row in summary_sorted.iterrows():
#     feature = index  # The feature name
#     if feature not in agged_features: continue
#     hr, hr_lower, hr_upper, p = row["exp(coef)"], row["exp(coef) lower 95%"], row["exp(coef) upper 95%"], row["p"]
#     print(f"{feature} HR: {hr:.3f} (95% CI: {hr_lower:.3f} - {hr_upper:.3f}) p: {p:.3g}")


In [None]:
agged_features = agged_features_by_type["Arterioles"]
score_features = agged_scores_by_type["Arterioles"]
pat_df_selected = pat_df.dropna(subset = DEMOGRAPHICS + CLININCAL_DATA + DESCRIPTOR + 
                                score_features + agged_features, inplace = False)
print(pat_df_selected.shape)
# for feature in agged_features:
#     fig, axs = plt.subplots(1, 1, figsize=(8, 5))
#     survival_analysis(pat_df_selected, feature, .5, axs, c="") 
#     plt.tight_layout()
#     plt.show()
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , [])
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , DESCRIPTOR)
cph, _ = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , score_features)
cph, selected_features = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , agged_features, uni=False)
# cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , score_features + agged_features)
for feature in selected_features:
    # Unadjusted model
    cph = cox_model_single(pat_df_selected, [], feature)
    row = cph.summary.loc[feature, ["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%", "p"]]
    hr, p = row["exp(coef)"], row["p"]
    lower_95, upper_95 = row["exp(coef) lower 95%"], row["exp(coef) upper 95%"]
    print(f"Unadjusted {feature}: HR {hr:.2f} ({lower_95:.2f}-{upper_95:.2f}), p: {p:.3f}")

    # Adjusted model
    cph = cox_model_single(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA, feature)
    row = cph.summary.loc[feature, ["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%", "p"]]
    hr, p = row["exp(coef)"], row["p"]
    lower_95, upper_95 = row["exp(coef) lower 95%"], row["exp(coef) upper 95%"]
    print(f"Adjusted {feature}: HR {hr:.2f} ({lower_95:.2f}-{upper_95:.2f}), p: {p:.3f}")


In [None]:
# # cph = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , agged_features, uni=True)
# cph = cox_model_analysis(pat_df_selected, DEMOGRAPHICS + CLININCAL_DATA , agged_features, uni=False)

# summary = cph.summary
# summary['abs_coef'] = summary['coef'].abs()
# summary_sorted = summary.sort_values(by='abs_coef', ascending=False)
# # Iterate over the sorted summary and print relevant statistics
# for index, row in summary_sorted.iterrows():
#     feature = index  # The feature name
#     if feature not in agged_features: continue
#     hr, hr_lower, hr_upper, p = row["exp(coef)"], row["exp(coef) lower 95%"], row["exp(coef) upper 95%"], row["p"]
#     print(f"{feature} HR: {hr:.3f} (95% CI: {hr_lower:.3f} - {hr_upper:.3f}) p: {p:.3g}")