In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
data_path = '/media/jaume/DATA/Data/Urblauna_SFTP/UKB_Cardiac_BIDS'
split_filename = 'derivatives/nimosef_flip_logs/train_val_test_split.json'
number_patients = 1000

# --- Training
baseline_experiment_name = 'experiment_20250310_175059' # Baseline v1
new_model_experiment_name = 'experiment_20250311_001426' # Motion v1

derivatives_path = os.path.join(data_path, 'derivatives')

# For the training
dataset_split = 'train'

res_factor_z = 1  # Same as original
img_folder_baseline = os.path.join(derivatives_path, 'nimosef_flip_logs', 'baseline', f"imgs_train_{baseline_experiment_name}_res_factor_{res_factor_z}")
img_folder_new_model = os.path.join(derivatives_path, 'nimosef_flip_logs', 'baseline', f"imgs_train_{new_model_experiment_name}_res_factor_{res_factor_z}")

save_folder_results = os.path.join(derivatives_path, 'nimosef_flip_logs', 'baseline', f"results_train_comparison_res_factor_{res_factor_z}")
os.makedirs(save_folder_results, exist_ok=True)

# Metadata file
metadata_filename = os.path.join(derivatives_path, 'metadata_participants_ALL.tsv')

In [None]:
# Image metrics
volumes_df_path = os.path.join(save_folder_results, 'volumes_imgs.parquet')
df_volumes = pd.read_parquet(volumes_df_path)

final_df_path = os.path.join(save_folder_results, 'connected_components.parquet')
final_df = pd.read_parquet(final_df_path)

dice_df_path = os.path.join(save_folder_results, 'dice_score.parquet')
df_dice_final = pd.read_parquet(dice_df_path)

label_map = {1.0: 'LV', 2.0: 'MYO', 3.0: 'RV'}
final_df['label'] = final_df['label'].map(label_map)

print(df_volumes.head())
print(final_df.head())
print(df_dice_final.head())

In [None]:
# PC metrics
path_chamfer = os.path.join(save_folder_results, 'chamfer.parquet')
path_hf95 = os.path.join(save_folder_results, 'hf95.parquet')
path_hf = os.path.join(save_folder_results, 'hf.parquet')

chamfer_distances = pd.read_parquet(path_chamfer)
hf95_distances = pd.read_parquet(path_hf95)
hf_distances =  pd.read_parquet(path_hf)

print(chamfer_distances.head())
print(hf95_distances.head())
print(hf_distances.head())

In [None]:
# Img metric processing
def format_mean_std(mean, std):
    return f"{mean:.2f} ± {std:.2f}"

# === CC
df_pivot_avg_cc = final_df.pivot_table(index=['Subject', 'segmentation_type'], columns='label', 
                           values='avg_components',
                           aggfunc='mean')  # if multiple rows exist, take the mean
df_pivot_avg_cc = df_pivot_avg_cc.reset_index()
df_pivot_avg_cc.set_index('Subject', inplace=True)

df_pivot_max_cc = final_df.pivot_table(index=['Subject', 'segmentation_type'], columns='label', 
                           values='max_components',
                           aggfunc='max')  # if multiple rows exist, take the mean
df_pivot_max_cc = df_pivot_max_cc.reset_index()
df_pivot_max_cc.set_index('Subject', inplace=True)

# Now, to compute summary measures (mean and std) for each anatomical class
# grouped by segmentation_type, we group the pivoted data by segmentation_type:
summary_max_cc = df_pivot_max_cc.groupby('segmentation_type').agg(['mean', 'std'])
print("\nSummary Statistics (raw):")
print(summary_max_cc)

# Now, to compute summary measures (mean and std) for each anatomical class
# grouped by segmentation_type, we group the pivoted data by segmentation_type:
summary_avg_cc = df_pivot_avg_cc.groupby('segmentation_type').agg(['mean', 'std'])
print("\nSummary Statistics (raw):")
print(summary_avg_cc)

# We'll reshape the summary to a long format:
summary_long = summary_max_cc.stack(level=0).reset_index()
summary_long = summary_long.rename(columns={'label': 'Anatomical_Class'})
summary_long['MeanStd'] = summary_long.apply(lambda row: format_mean_std(row['mean'], row['std']), axis=1)
print("\nFormatted Summary (Mean ± Std):")
print(summary_long[['segmentation_type', 'Anatomical_Class', 'MeanStd']])


In [None]:
# Group by segmentation_type and compute mean and std for LV, MYO, and RV
summary_dice = df_dice_final.copy().groupby('segmentation_type')[['LV', 'MYO', 'RV']].agg(['mean', 'std'])

# Flatten the MultiIndex columns:
summary_dice.columns = ['_'.join(col) for col in summary_dice.columns]

# Create a new DataFrame with formatted strings "mean ± std"
summary_formatted = summary_dice.copy()
for col in ['LV', 'MYO', 'RV']:
    summary_formatted[col] = summary_dice[f"{col}_mean"].round(3).astype(str) + " ± " + summary_dice[f"{col}_std"].round(3).astype(str)

# Optionally, keep only the formatted columns:
summary_formatted = summary_formatted[['LV', 'MYO', 'RV']]
print(summary_formatted)
# df_dice_final.groupby('segmentation_type').agg(['mean', 'std'])

In [None]:
df_chamfer = chamfer_distances.copy()
df_chamfer.index.name = 'Subject'
df_melt_chamfer = pd.melt(df_chamfer.reset_index(), id_vars='Subject')
mean_chamfer = df_melt_chamfer[['value']].mean()
std_chamfer = df_melt_chamfer[['value']].std()

df_hf95 = hf95_distances.copy()
df_hf95.index.name = 'Subject'
df_melt_hf95 = pd.melt(df_hf95.reset_index(), id_vars='Subject')
mean_hf95 = df_melt_hf95[['value']].mean()
std_hf95 = df_melt_hf95[['value']].std()

# df_melt_chamfer.groupby('Subject')[['value']].agg(['mean', 'std'])

In [None]:
# Extract the scalar values:
mean_chamfer_val = mean_chamfer.values[0]
std_chamfer_val  = std_chamfer.values[0]
mean_hf95_val    = mean_hf95.values[0]
std_hf95_val     = std_hf95.values[0]

# Create a DataFrame with a single row, labeling it as "New approach"
data = {
    'Metric': ['Ours'],
    'Chamfer': [f"{mean_chamfer_val:.2f} ± {std_chamfer_val:.2f}"],
    'HF95': [f"{mean_hf95_val:.2f} ± {std_hf95_val:.2f}"]
}
df_table = pd.DataFrame(data)

# Convert the DataFrame to a LaTeX table
latex_table = df_table.to_latex(index=False, 
                                caption="Summary of Chamfer and HF95 Distances for New Approach", 
                                label="tab:distances", 
                                column_format="lcc")
print(latex_table)

In [None]:
# Group by 'segmentation_type' and compute mean and std for 'LV', 'MYO', and 'RV'
summary_max_cc = df_pivot_max_cc.groupby('segmentation_type')[['LV', 'MYO', 'RV']].agg(['mean', 'std'])

# Flatten the MultiIndex columns
summary_max_cc.columns = ['_'.join(col) for col in summary_max_cc.columns]

# Create a new DataFrame with formatted strings "mean ± std"
summary_formatted_max_cc = pd.DataFrame(index=summary_max_cc.index)
for col in ['LV', 'MYO', 'RV']:
    summary_formatted_max_cc[col] = (
        summary_max_cc[f"{col}_mean"].round(3).astype(str) +
        " ± " +
        summary_max_cc[f"{col}_std"].round(3).astype(str)
    )

summary_formatted_max_cc

In [None]:
# Create a new DataFrame with formatted strings "mean ± std" with 2 decimals.
summary_formatted = summary_dice.copy()
for col in ['LV', 'MYO', 'RV']:
    summary_formatted[col] = (summary_dice[f"{col}_mean"].round(2).astype(str) + " ± " +
                              summary_dice[f"{col}_std"].round(2).astype(str))
# Keep only the formatted columns:
summary_formatted = summary_formatted[['LV', 'MYO', 'RV']]

# Rename the segmentation_type index values.
rename_dict = {'pred_base': 'Baseline', 'pred_new': 'Ours', 'gt': 'CNN reference'}
summary_formatted.index = summary_formatted.index.map(lambda x: rename_dict.get(x, x))

# Now compute the Max. CC summary from df_pivot_max_cc (which should have columns 'LV', 'MYO', 'RV')
summary_max_cc = df_pivot_max_cc.groupby('segmentation_type').agg(['mean', 'std'])
summary_max_cc.columns = ['_'.join(col) for col in summary_max_cc.columns]

# Create formatted strings "mean ± std" (2 decimals)
summary_max_cc_formatted = summary_max_cc.copy()
for col in ['LV', 'MYO', 'RV']:
    summary_max_cc_formatted[col] = (summary_max_cc[f"{col}_mean"].round(2).astype(str) + " ± " +
                                      summary_max_cc[f"{col}_std"].round(2).astype(str))
summary_max_cc_formatted = summary_max_cc_formatted[['LV', 'MYO', 'RV']]

# Rename segmentation_type values
summary_max_cc_formatted.index = summary_max_cc_formatted.index.map(lambda x: rename_dict.get(x, x))

# Rename columns so we can distinguish them:
df_dice = summary_formatted.rename(columns={'LV': 'Dice LV', 'MYO': 'Dice MYO', 'RV': 'Dice RV'})
df_max_cc = summary_max_cc_formatted.rename(columns={'LV': 'MaxCC LV', 'MYO': 'MaxCC MYO', 'RV': 'MaxCC RV'})

# Combine the two DataFrames on their index (segmentation_type)
combined = pd.concat([df_dice, df_max_cc], axis=1)
print(combined.head())

# Create a MultiIndex for the columns:
new_columns = []
for col in combined.columns:
    if col.startswith('Dice'):
        # e.g., "Dice LV" -> top-level "Dice", sub-level "LV"
        parts = col.split(' ')
        new_columns.append(('Dice', parts[1]))
    elif col.startswith('MaxCC'):
        parts = col.split(' ')
        new_columns.append(('Max. CC', parts[1]))
combined.columns = pd.MultiIndex.from_tuples(new_columns)

# Optionally, you can sort the columns so that the Dice group comes first.
# For example, if there are only two groups ("Dice" and "Max. CC"), you can do:
combined = combined[['Dice', 'Max. CC']]

# Now, output the combined DataFrame as a LaTeX table.
latex_combined = combined.to_latex(multicolumn=True,
                                   multirow=True,
                                   caption="Summary of Dice and Maximum CC by Segmentation Type and Anatomical Class",
                                   label="tab:metrics_combined",
                                   column_format="lccc|ccc")  # adjust formatting as desired

print(latex_combined)
