# Main results

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
summary_file = os.path.join(base_path, 'cindex_summary.csv')
ci_file = os.path.join(base_path, 'cindex_bootstrap_ci_summary.csv')
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'cindex_final.csv')

# --- 2. Load Data ---
print("Loading data...")
try:
    summary_df = pd.read_csv(summary_file)
    ci_df = pd.read_csv(ci_file)
    print("Data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the path is correct. {e}")
    exit() # Exit the script if files are not found

# --- 3. Merge Data ---
merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
]
print("Merging the two DataFrames...")
merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data ---
print("Reshaping data into a long format...")
# Part A: Create the c_index part
c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)


# Part B: Create the delta_c_index part
delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)


# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
print("Data reshaping complete.")

# --- 6. Save Results ---
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

# Subgroup

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
summary_file = os.path.join(base_path, 'cindex_subgroup_summary.csv')

# List of all Confidence Interval (CI) files to be consolidated
ci_files = [
    'cindex_subgroup_bootstrap_ci_Age.csv',
    'cindex_subgroup_bootstrap_ci_Sex.csv',
    'cindex_subgroup_bootstrap_ci_Lipid.csv',
    'cindex_subgroup_bootstrap_ci_Antihypertensive.csv'
]

# Full path for the final output file
output_file = os.path.join(output_dir, 'cindex_subgroup_final.csv')


# --- 2. Load and consolidate all CI files ---
print("Loading and consolidating all subgroup CI files...")
ci_df_list = []
try:
    for file_name in ci_files:
        full_path = os.path.join(base_path, file_name)
        df = pd.read_csv(full_path)
        ci_df_list.append(df)
    combined_ci_df = pd.concat(ci_df_list, ignore_index=True)
    print(f"Successfully consolidated {len(ci_files)} CI files.")

except FileNotFoundError as e:
    print(f"Error: File not found. Please check the CI file list and paths. {e}")
    exit()

# --- 3. Load main data file and merge ---
print("Loading the main summary file...")
try:
    summary_df = pd.read_csv(summary_file)
except FileNotFoundError as e:
    print(f"Error: Could not find the summary file {summary_file}.")
    exit()

print("Merging the summary data with the CI data...")
merge_keys = ['subgroup', 'outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
]

# Perform the merge operation
merged_df = pd.merge(summary_df, combined_ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data (same logic as before) ---
print("Reshaping data into a long format...")

# Part A: c_index part
c_index_cols = merge_keys + ['c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']
c_index_part = merged_df[c_index_cols].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)

# Part B: delta_c_index part
delta_c_index_cols = merge_keys + ['delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']
delta_c_index_part = merged_df[delta_c_index_cols].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)

# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
output_columns = [
    'subgroup', 'outcome', 'n_samples', 'baseline_model', 
    'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper'
]
final_df = final_df[output_columns]
print("Data reshaping complete.")


# --- 6. Save Results ---
os.makedirs(output_dir, exist_ok=True)
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))
print("\nFinal Data Preview (last 10 rows):")
print(final_df.tail(10))

# Individual biomarker

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
output_file = os.path.join(output_dir, 'cindex_individual_biomarker_final.csv')

# --- 2. Load Data ---
summary_file = os.path.join(base_path, 'cindex_individual_biomarker_summary.csv')
ci_files_list = [
    'cindex_individual_biomarker_protein_bootstrap_ci_summary.csv',
    'cindex_individual_biomarker_metabolite_bootstrap_ci_summary.csv'
]
print("Loading data...")
try:
    summary_df = pd.read_csv(summary_file)
    
    ci_df_list = []
    for file_name in ci_files_list:
        full_path = os.path.join(base_path, file_name)
        print(f"Loading CI file: {file_name}")
        df = pd.read_csv(full_path)
        
        if 'protein' in file_name:
            df['biomarker'] = 'protein'
        elif 'metabolite' in file_name:
            df['biomarker'] = 'metabolite'
        else:
            df['biomarker'] = 'unknown' # Add a fallback value
            
        ci_df_list.append(df)
        
    combined_ci_df = pd.concat(ci_df_list, ignore_index=True)
    
    print("Data loaded successfully!")

except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the path is correct. {e}")
    exit()

# --- 3. Merge Data ---
merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper',
    'biomarker'
]
print("Merging the two DataFrames...")
merged_df = pd.merge(summary_df, combined_ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data ---
print("Reshaping data into a long format...")
cols_to_keep = ['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'biomarker']

# Part A: Create the c_index part
c_index_part = merged_df[cols_to_keep + ['c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)


# Part B: Create the delta_c_index part
delta_c_index_part = merged_df[cols_to_keep + ['delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)


# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'biomarker', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
print("Data reshaping complete.")

# --- 6. Save Results ---
os.makedirs(output_dir, exist_ok=True)
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

# LDL biomarker

## All participants

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
summary_file = os.path.join(base_path, 'cindex_LDL_biomarker_summary.csv')
ci_file = os.path.join(base_path, 'cindex_LDL_biomarker_bootstrap_ci_summary.csv')
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'cindex_LDL_biomarker_final.csv')

# --- 2. Load Data ---
print("Loading data...")
try:
    summary_df = pd.read_csv(summary_file)
    ci_df = pd.read_csv(ci_file)
    print("Data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the path is correct. {e}")
    exit() # Exit the script if files are not found

# --- 3. Merge Data ---
merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
]
print("Merging the two DataFrames...")
merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data ---
print("Reshaping data into a long format...")
# Part A: Create the c_index part
c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)


# Part B: Create the delta_c_index part
delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)


# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
print("Data reshaping complete.")

# --- 6. Save Results ---
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

## No statins

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
summary_file = os.path.join(base_path, 'cindex_LDL_biomarker_no_statins_summary.csv')
ci_file = os.path.join(base_path, 'cindex_LDL_biomarker_no_statins_bootstrap_ci_summary.csv')
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'cindex_LDL_biomarker_no_statins_final.csv')

# --- 2. Load Data ---
print("Loading data...")
try:
    summary_df = pd.read_csv(summary_file)
    ci_df = pd.read_csv(ci_file)
    print("Data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the path is correct. {e}")
    exit() # Exit the script if files are not found

# --- 3. Merge Data ---
merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
]
print("Merging the two DataFrames...")
merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data ---
print("Reshaping data into a long format...")
# Part A: Create the c_index part
c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)


# Part B: Create the delta_c_index part
delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)


# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
print("Data reshaping complete.")

# --- 6. Save Results ---
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

In [None]:
import pandas as pd
import os

# =============================================================================
#  Setup Common Paths
# =============================================================================
base_path = '/your path/cardiomicscore/saved/results/Cindex'
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
os.makedirs(output_dir, exist_ok=True)

# =============================================================================
#  Part 1: Process the 'biomarker' files in memory
# =============================================================================
print("--- Starting Part 1: Processing 'biomarker' data ---")

# --- 1.1 Set File Paths for biomarker data ---
summary_file = os.path.join(base_path, 'cindex_LDL_biomarker_no_statins_summary.csv')
ci_file = os.path.join(base_path, 'cindex_LDL_biomarker_no_statins_bootstrap_ci_summary.csv')

# --- 1.2 Load Data for biomarker ---
print("Loading biomarker data...")
try:
    summary_df = pd.read_csv(summary_file)
    ci_df = pd.read_csv(ci_file)
    print("Biomarker data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: Biomarker file not found. {e}")

# --- 1.3 Merge and Reshape Data for biomarker ---
if 'summary_df' in locals() and 'ci_df' in locals():
    merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
    ci_cols_to_merge = merge_keys + [
        'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
        'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
    ]
    merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')

    c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
    c_index_part['metric'] = 'c_index'
    c_index_part.rename(columns={
        'c_index_combo': 'point_estimate',
        'c_index_combo_ci_lower': 'ci_lower',
        'c_index_combo_ci_upper': 'ci_upper'
    }, inplace=True)

    delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
    delta_c_index_part['metric'] = 'delta_c_index'
    delta_c_index_part.rename(columns={
        'delta_c_index': 'point_estimate',
        'delta_c_index_ci_lower': 'ci_lower',
        'delta_c_index_ci_upper': 'ci_upper'
    }, inplace=True)

    final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
    final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
    print("Biomarker data processed and held in memory.")

print("\n--- Finished Part 1 ---\n")


# =============================================================================
#  Part 2: Process the 'panel' files in memory
# =============================================================================
print("--- Starting Part 2: Processing 'panel' data ---")

# --- 2.1 Set File Paths for panel data ---
summary_file_panel = os.path.join(base_path, 'cindex_LDL_biomarker_no_statins_panel_summary.csv')
ci_file_panel = os.path.join(base_path, 'cindex_LDL_biomarker_no_statins_panel_bootstrap_ci_summary.csv')

# --- 2.2 Load Data for panel ---
print("Loading panel data...")
try:
    summary_df_panel = pd.read_csv(summary_file_panel)
    ci_df_panel = pd.read_csv(ci_file_panel)
    print("Panel data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: Panel file not found. {e}")

# --- 2.3 Merge and Reshape Data for panel ---
if 'summary_df_panel' in locals() and 'ci_df_panel' in locals():
    merge_keys_panel = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
    ci_cols_to_merge_panel = merge_keys_panel + [
        'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
        'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
    ]
    merged_df_panel = pd.merge(summary_df_panel, ci_df_panel[ci_cols_to_merge_panel], on=merge_keys_panel, how='inner')

    c_index_part_panel = merged_df_panel[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
    c_index_part_panel['metric'] = 'c_index'
    c_index_part_panel.rename(columns={
        'c_index_combo': 'point_estimate',
        'c_index_combo_ci_lower': 'ci_lower',
        'c_index_combo_ci_upper': 'ci_upper'
    }, inplace=True)

    delta_c_index_part_panel = merged_df_panel[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
    delta_c_index_part_panel['metric'] = 'delta_c_index'
    delta_c_index_part_panel.rename(columns={
        'delta_c_index': 'point_estimate',
        'delta_c_index_ci_lower': 'ci_lower',
        'delta_c_index_ci_upper': 'ci_upper'
    }, inplace=True)

    final_df_panel = pd.concat([c_index_part_panel, delta_c_index_part_panel], ignore_index=True)
    final_df_panel = final_df_panel[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
    print("Panel data processed and held in memory.")

print("\n--- Finished Part 2 ---")


# =============================================================================
#  Part 3: Combine results and save the single final file
# =============================================================================
print("\n--- Starting Part 3: Combining and saving final result ---")

# Create a list to hold the dataframes we want to combine
dfs_to_combine = []
if 'final_df' in locals():
    dfs_to_combine.append(final_df)
if 'final_df_panel' in locals():
    dfs_to_combine.append(final_df_panel)

# Proceed only if there is at least one dataframe to combine
if dfs_to_combine:
    # Concatenate all dataframes in the list
    combined_df = pd.concat(dfs_to_combine, ignore_index=True)

    # Define the final output file path and save the result
    final_output_file = os.path.join(output_dir, 'cindex_LDL_biomarker_no_statins_final.csv')
    combined_df.to_csv(final_output_file, index=False)

    print(f"\nSuccessfully combined {len(dfs_to_combine)} result set(s).")
    print(f"Final file saved to:\n{final_output_file}")

    print("\nFinal Combined Data Preview (first 10 rows):")
    print(combined_df.head(10))
else:
    print("\nNo dataframes were created, so no final file was generated.")

print("\n--- Script finished ---")

# Machine learning models

## Seperate models

In [None]:
import pandas as pd
import os

def process_and_merge_cindex_data(model_prefix, base_path, output_dir):
    """
    A general function to load, merge, format, and save C-index data for a specific model.

    Args:
        model_prefix (str): The prefix name of the model, e.g., 'glm', 'xgb'.
        base_path (str): The directory where the input files are located.
        output_dir (str): The directory where the output file will be saved.
    """
    # --- 1. Dynamically generate filenames based on the model prefix ---
    summary_file = os.path.join(base_path, f'cindex_{model_prefix}_summary.csv')
    ci_file = os.path.join(base_path, f'cindex_{model_prefix}_bootstrap_ci_summary.csv')
    output_file = os.path.join(output_dir, f'cindex_{model_prefix}_final.csv')

    print(f"--- Processing model: {model_prefix} ---")

    # --- 2. Load Data ---
    try:
        summary_df = pd.read_csv(summary_file)
        ci_df = pd.read_csv(ci_file)
    except FileNotFoundError as e:
        print(f"Error loading files for {model_prefix}. Skipping. Details: {e}")
        return # If files don't exist, skip this model and return from the function

    # --- 3. Merge Data ---
    merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
    ci_cols_to_merge = merge_keys + [
        'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
        'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
    ]
    merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')

    # --- 4. Reshape Data (Logic is the same as in your original script) ---
    # Part A: Create the c_index part
    c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
    c_index_part['metric'] = 'c_index'
    c_index_part.rename(columns={
        'c_index_combo': 'point_estimate',
        'c_index_combo_ci_lower': 'ci_lower',
        'c_index_combo_ci_upper': 'ci_upper'
    }, inplace=True)

    # Part B: Create the delta_c_index part
    delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
    delta_c_index_part['metric'] = 'delta_c_index'
    delta_c_index_part.rename(columns={
        'delta_c_index': 'point_estimate',
        'delta_c_index_ci_lower': 'ci_lower',
        'delta_c_index_ci_upper': 'ci_upper'
    }, inplace=True)

    # --- 5. Final Integration ---
    final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
    final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]

    # --- 6. Save Results ---
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)
    final_df.to_csv(output_file, index=False)
    print(f"Successfully processed and saved to: {output_file}\n")


# --- Main execution block ---
if __name__ == "__main__":
    BASE_INPUT_PATH = '/your path/cardiomicscore/saved/results/Cindex'
    BASE_OUTPUT_PATH = '/your path/cardiomicscore/saved/results/Cindex/Merge'

    # Define a list of all model prefixes to process
    model_prefixes_to_process = ['glm', 'xgb', 'lgbm', 'rf', 'unweighted']

    # Loop through and process each model in the list
    for prefix in model_prefixes_to_process:
        process_and_merge_cindex_data(prefix, BASE_INPUT_PATH, BASE_OUTPUT_PATH)
        
    print("--- All models processed. ---")

## All models

In [None]:
import pandas as pd
import os

def process_model_and_get_df(model_prefix, base_path):
    """
    Processes the data for a single model, adds a 'model' column, 
    and returns the resulting DataFrame.

    Args:
        model_prefix (str): The prefix name of the model, e.g., 'glm', 'xgb'.
        base_path (str): The directory where the input files are located.

    Returns:
        pandas.DataFrame: A DataFrame with the processed data, or None if an error occurs.
    """
    # --- 1. Dynamically generate filenames based on the model prefix ---
    summary_file = os.path.join(base_path, f'cindex_{model_prefix}_summary.csv')
    ci_file = os.path.join(base_path, f'cindex_{model_prefix}_bootstrap_ci_summary.csv')

    print(f"--- Processing model: {model_prefix} ---")

    # --- 2. Load Data ---
    try:
        summary_df = pd.read_csv(summary_file)
        ci_df = pd.read_csv(ci_file)
    except FileNotFoundError as e:
        print(f"Error loading files for {model_prefix}. Skipping. Details: {e}")
        return None # If files don't exist, return None
    
    print(f"Standardizing 'baseline_model' in summary data for {model_prefix} to 'PANEL' before merging.")
    summary_df['baseline_model'] = 'PANEL'
    
    # --- 3. Merge & Reshape (Logic is unchanged) ---
    merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
    ci_cols_to_merge = merge_keys + [
        'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
        'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
    ]
    merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')

    c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
    c_index_part['metric'] = 'c_index'
    c_index_part.rename(columns={'c_index_combo': 'point_estimate', 'c_index_combo_ci_lower': 'ci_lower', 'c_index_combo_ci_upper': 'ci_upper'}, inplace=True)

    delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
    delta_c_index_part['metric'] = 'delta_c_index'
    delta_c_index_part.rename(columns={'delta_c_index': 'point_estimate', 'delta_c_index_ci_lower': 'ci_lower', 'delta_c_index_ci_upper': 'ci_upper'}, inplace=True)

    final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
    
    # --- 4. New requirement: Add the 'model' column ---
    final_df['model'] = model_prefix
    
    print(f"Finished processing for {model_prefix}.")
    # --- 5. Return the processed DataFrame ---
    return final_df


# --- Main execution block ---
if __name__ == "__main__":
    # Define base input and output paths
    BASE_INPUT_PATH = '/your path/cardiomicscore/saved/results/Cindex'
    BASE_OUTPUT_PATH = '/your path/cardiomicscore/saved/results/Cindex/Merge'

    # Define a list of all model prefixes to process
    # model_prefixes_to_process = ['glm', 'xgb', 'lgbm', 'rf', 'unweighted']
    model_prefixes_to_process = ['glm', 'xgb', 'lgbm', 'rf']

    # Create an empty list to store the DataFrame from each processed model
    all_models_dataframes = []

    # Loop through and process each model in the list
    for prefix in model_prefixes_to_process:
        processed_df = process_model_and_get_df(prefix, BASE_INPUT_PATH)
        if processed_df is not None:
            all_models_dataframes.append(processed_df)
    
    # --- After the loop, combine all DataFrames in the list into one ---
    if all_models_dataframes:
        print("\n--- Combining all processed models into a single file ---")
        combined_df = pd.concat(all_models_dataframes, ignore_index=True)

        output_columns = [
            'outcome', 'n_samples', 'baseline_model', 'comparison_model', 'model', 
            'metric', 'point_estimate', 'ci_lower', 'ci_upper'
        ]
        combined_df = combined_df[output_columns]

        output_file = os.path.join(BASE_OUTPUT_PATH, 'cindex_all_models_final.csv')
        os.makedirs(BASE_OUTPUT_PATH, exist_ok=True)
        combined_df.to_csv(output_file, index=False)
        
        print(f"Successfully combined all models and saved to:\n{output_file}")
        print("\nFinal Combined Data Preview:")
        print(combined_df.head())
    else:
        print("No data was processed.")

# Single omics vs multiomics

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
summary_file = os.path.join(base_path, 'cindex_single_vs_multi_omics_summary.csv')
ci_file = os.path.join(base_path, 'cindex_single_vs_multi_omics_bootstrap_ci_summary.csv')
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'cindex_single_vs_multi_omics_final.csv')

# --- 2. Load Data ---
print("Loading data...")
try:
    summary_df = pd.read_csv(summary_file)
    ci_df = pd.read_csv(ci_file)
    print("Data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the path is correct. {e}")
    exit() # Exit the script if files are not found

# --- 3. Merge Data ---
merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
]
print("Merging the two DataFrames...")
merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data ---
print("Reshaping data into a long format...")
# Part A: Create the c_index part
c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)


# Part B: Create the delta_c_index part
delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)


# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
print("Data reshaping complete.")

# --- 6. Save Results ---
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

# Exclude events occured within 2 years

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
summary_file = os.path.join(base_path, 'cindex_event_2yrs_summary.csv')
ci_file = os.path.join(base_path, 'cindex_event_2yrs_bootstrap_ci_summary.csv')
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
output_file = os.path.join(output_dir, 'cindex_event_2yrs_final.csv')

# --- 2. Load Data ---
print("Loading data...")
try:
    summary_df = pd.read_csv(summary_file)
    ci_df = pd.read_csv(ci_file)
    print("Data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the path is correct. {e}")
    exit() # Exit the script if files are not found

# --- 3. Merge Data ---
merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
]
print("Merging the two DataFrames...")
merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data ---
print("Reshaping data into a long format...")
# Part A: Create the c_index part
c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)


# Part B: Create the delta_c_index part
delta_c_index_part = merged_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)


# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
final_df = final_df[['outcome', 'n_samples', 'baseline_model', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']]
print("Data reshaping complete.")

# --- 6. Save Results ---
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

# Fine-gray models

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'

# Main data file
summary_file = os.path.join(base_path, 'cindex_competing_risk_summary.csv')

# List of all Confidence Interval (CI) files to be consolidated
ci_files = [
    'cindex_competing_risk_bootstrap_ci_cad_stroke.csv',
    'cindex_competing_risk_bootstrap_ci_hf_af.csv',
    'cindex_competing_risk_bootstrap_ci_pad_vte.csv'
]
output_file = os.path.join(output_dir, 'cindex_competing_risk_final.csv')


# --- 2. Load and consolidate all CI files ---
print("Loading and consolidating all competing risk CI files...")
ci_df_list = []
try:
    for file_name in ci_files:
        full_path = os.path.join(base_path, file_name)
        df = pd.read_csv(full_path)
        ci_df_list.append(df)
    combined_ci_df = pd.concat(ci_df_list, ignore_index=True)
    print(f"Successfully consolidated {len(ci_files)} CI files.")

except FileNotFoundError as e:
    print(f"Error: File not found. Please check the CI file list and paths. {e}")
    exit()

# --- 3. Load main data file and merge ---
print("Loading the main data file...")
try:
    summary_df = pd.read_csv(summary_file)
except FileNotFoundError as e:
    print(f"Error: Could not find the summary file {summary_file}.")
    exit()

print("Merging the summary data with the CI data...")
merge_keys = ['outcome', 'baseline_model', 'comparison_model', 'n_samples']
ci_cols_to_merge = merge_keys + [
    'c_index_combo_ci_lower', 'c_index_combo_ci_upper',
    'delta_c_index_ci_lower', 'delta_c_index_ci_upper'
]
merged_df = pd.merge(summary_df, combined_ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Reshape Data (same logic as before) ---
print("Reshaping data into a long format...")

# Part A: c_index part
c_index_cols = merge_keys + ['c_index_combo', 'c_index_combo_ci_lower', 'c_index_combo_ci_upper']
c_index_part = merged_df[c_index_cols].copy()
c_index_part['metric'] = 'c_index'
c_index_part.rename(columns={
    'c_index_combo': 'point_estimate',
    'c_index_combo_ci_lower': 'ci_lower',
    'c_index_combo_ci_upper': 'ci_upper'
}, inplace=True)

# Part B: delta_c_index part
delta_c_index_cols = merge_keys + ['delta_c_index', 'delta_c_index_ci_lower', 'delta_c_index_ci_upper']
delta_c_index_part = merged_df[delta_c_index_cols].copy()
delta_c_index_part['metric'] = 'delta_c_index'
delta_c_index_part.rename(columns={
    'delta_c_index': 'point_estimate',
    'delta_c_index_ci_lower': 'ci_lower',
    'delta_c_index_ci_upper': 'ci_upper'
}, inplace=True)

# --- 5. Final Integration ---
final_df = pd.concat([c_index_part, delta_c_index_part], ignore_index=True)
output_columns = [
    'outcome', 'n_samples', 'baseline_model', 
    'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper'
]
final_df = final_df[output_columns]
print("Data reshaping complete.")


# --- 6. Save Results ---
os.makedirs(output_dir, exist_ok=True)
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

# Geographic test

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'
summary_file = os.path.join(base_path, 'cindex_train_val_internal_test_summary.csv')
ci_file = os.path.join(base_path, 'cindex_train_val_internal_test_bootstrap_ci_summary.csv')
output_file = os.path.join(output_dir, 'cindex_train_val_test_final.csv')

# --- 2. Load Data ---
print("Loading data...")
try:
    summary_df = pd.read_csv(summary_file)
    ci_df = pd.read_csv(ci_file)
    print("Data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the paths are correct. {e}")
    exit()

# --- 3. Merge Data ---
print("Merging the two DataFrames...")
merge_keys = ['data_split', 'outcome', 'model', 'n_samples', 'n_events']
ci_cols_to_merge = merge_keys + ['c_index_ci_lower', 'c_index_ci_upper']
merged_df = pd.merge(summary_df, ci_df[ci_cols_to_merge], on=merge_keys, how='inner')
print("Data merge complete.")


# --- 4. Format and Reshape Data ---
print("Formatting data to match the output structure...")
final_df = pd.DataFrame()
final_df['data_split'] = merged_df['data_split']
final_df['outcome'] = merged_df['outcome']
final_df['n_samples'] = merged_df['n_samples']
final_df['comparison_model'] = merged_df['model']

final_df['metric'] = 'c_index'
final_df['point_estimate'] = merged_df['c_index']
final_df['ci_lower'] = merged_df['c_index_ci_lower']
final_df['ci_upper'] = merged_df['c_index_ci_upper']

output_columns = ['data_split', 'outcome', 'n_samples', 'comparison_model', 'metric', 'point_estimate', 'ci_lower', 'ci_upper']
final_df = final_df[output_columns]
print("Data formatting complete.")

# --- 5. Save Results ---
os.makedirs(output_dir, exist_ok=True)
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The consolidated file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview (first 10 rows):")
print(final_df.head(10))

# Clinical scores

In [None]:
import pandas as pd
import os

# --- 1. Set File Paths ---
base_path = '/your path/cardiomicscore/saved/results/Cindex'
output_dir = '/your path/cardiomicscore/saved/results/Cindex/Merge'

# Define input and output filenames
input_file = os.path.join(base_path, 'clinical_scores_cindex_summary.csv')
output_file = os.path.join(output_dir, 'clinical_scores_final.csv')

# --- 2. Load Data ---
print(f"Loading file: {input_file}...")
try:
    df = pd.read_csv(input_file)
    print("Data loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: File not found. Please check if the path is correct. {e}")
    exit()

# --- 3. Format Data ---
print("Formatting the data...")
df.rename(columns={
    'predictor': 'comparison_model',
    'cindex': 'point_estimate',
    'cindex_ci_lower': 'ci_lower',
    'cindex_ci_upper': 'ci_upper'
}, inplace=True)
df = df.assign(metric='c_index')

output_columns = [
    'outcome', 
    'comparison_model', 
    'metric', 
    'point_estimate', 
    'ci_lower', 
    'ci_upper'
]
final_df = df[output_columns]
print("Data formatting complete.")

# --- 4. Save Results ---
os.makedirs(output_dir, exist_ok=True)
final_df.to_csv(output_file, index=False)
print(f"\nProcessing complete! The formatted file has been saved to:\n{output_file}")

# --- Print a preview of the result ---
print("\nFinal Data Preview:")
print(final_df.head())