In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import SimpleITK as sitk

input_dir = Path("/gpfs/work1/0/prjs0971/glioseg/data/ASNR-MICCAI-BraTS2023-GLI-Challenge-TrainingData/original")

labels_mapping = {
    1: "ED", 
    2: "NEC", 
    3: "ET"
}

final_results = []


for patient in input_dir.iterdir():

    simple_relabel_im_dir = patient.joinpath(f"{patient.name}-seg_relabel").with_suffix(".nii.gz")
    nec_relabel_im_dir = patient.joinpath(f"{patient.name}-seg_nec_relabel").with_suffix(".nii.gz")
    
    simple_relabel_im = sitk.ReadImage(str(simple_relabel_im_dir))
    simple_relabel_arr = sitk.GetArrayFromImage(simple_relabel_im)

    nec_relabel_im = sitk.ReadImage(str(nec_relabel_im_dir))
    nec_relabel_arr = sitk.GetArrayFromImage(nec_relabel_im)

    results_per_patient = {}
    results_per_patient["Patient ID"] = patient.name

    for label, label_name in labels_mapping.items():

        simple_count = np.sum(simple_relabel_arr == label)
        nec_count = np.sum(nec_relabel_arr == label)

        results_per_patient[f"{label_name}_before_relabel"] = simple_count
        results_per_patient[f"{label_name}_after_relabel"] = nec_count
    
    final_results.append(results_per_patient)

final_results_df = pd.DataFrame.from_dict(final_results)
final_results_df.to_csv(Path("/home/gesteban/glioseg/glioseg/extras/results/relabel_necrosis_comparison_brats2023.csv"), index=False)

In [2]:
import seaborn as sns
import pandas as pd
from pathlib import Path

# Now we read the pdf and check the differences 

final_results_df = pd.read_csv(Path("/home/gesteban/glioseg/glioseg/extras/results/relabel_necrosis_comparison_brats2023.csv"))
final_results_df["ED_difference"] = final_results_df["ED_before_relabel"] - final_results_df["ED_after_relabel"]
final_results_df["NEC_difference"] = final_results_df["NEC_before_relabel"] - final_results_df["NEC_after_relabel"]
final_results_df["ET_difference"] = final_results_df["ET_before_relabel"] - final_results_df["ET_after_relabel"]    

final_results_df.to_csv(Path("/home/gesteban/glioseg/glioseg/extras/results/relabel_necrosis_comparison_brats2023_with_differences.csv"), index=False)


# Print mean, min, and max difference for each label
for label_name in ["ED","NEC", "ET"]:
    mean_diff = final_results_df[f"{label_name}_difference"].mean()
    min_diff = final_results_df[f"{label_name}_difference"].min()
    max_diff = final_results_df[f"{label_name}_difference"].max()
    print(f"{label_name} - Mean difference: {mean_diff}, Min difference: {min_diff}, Max difference: {max_diff}")
    print(f"Max difference found in patient {final_results_df.loc[final_results_df[f'{label_name}_difference'].idxmax(), 'Patient ID']}")
    print(f"Min difference found in patient {final_results_df.loc[final_results_df[f'{label_name}_difference'].idxmin(), 'Patient ID']}")


ED - Mean difference: -3995.224620303757, Min difference: -189023, Max difference: 0
Max difference found in patient BraTS-GLI-01297-000
Min difference found in patient BraTS-GLI-01533-000
NEC - Mean difference: 3995.0263788968823, Min difference: -97, Max difference: 189023
Max difference found in patient BraTS-GLI-01533-000
Min difference found in patient BraTS-GLI-01042-000
ET - Mean difference: 0.07513988808952837, Min difference: 0, Max difference: 40
Max difference found in patient BraTS-GLI-01173-000
Min difference found in patient BraTS-GLI-01297-000


In [3]:
def extract_label(mask: sitk.Image, labels: int | list[int]) -> sitk.Image:
    """
    Binarizes a segmentation mask by thresholding specific labels.

    Args:
        mask (sitk.Image): The input segmentation mask as a SimpleITK image.
        labels (int | list[int]): The label value(s) to be binarized.
            - If a single integer, voxels with this value will be set to 1, and all others to 0.
            - If a list, voxels with any of these values will be set to 1, and all others to 0.

    Returns:
        sitk.Image: A binarized mask where the specified label(s) are set to 1, and all other values are 0.
    """
    if isinstance(labels, int):
        return sitk.BinaryThreshold(mask, lowerThreshold=labels, upperThreshold=labels)
    binary_mask = sitk.Image(mask.GetSize(), sitk.sitkUInt8)
    binary_mask.CopyInformation(mask)
    for label in labels:
        binary_mask = binary_mask | sitk.BinaryThreshold(
            mask, lowerThreshold=label, upperThreshold=label
        )
    return binary_mask

In [4]:
import numpy as np
import pandas as pd
from pathlib import Path
import SimpleITK as sitk

input_dir = Path("/gpfs/work1/0/prjs0971/glioseg/data/ASNR-MICCAI-BraTS2023-GLI-Challenge-TrainingData/original")

labels_mapping = {
    1: "ED", 
    2: "NEC", 
    3: "ET"
}

final_results = {"Patient ID": [] , "Num_NEC_Components": []}


for patient in input_dir.iterdir():

    simple_relabel_im_dir = patient.joinpath(f"{patient.name}-seg_relabel").with_suffix(".nii.gz")
    simple_relabel_im = sitk.ReadImage(str(simple_relabel_im_dir), sitk.sitkUInt8)

    simple_relabel_im_nec = extract_label(simple_relabel_im, labels=2)

    # Find connected components
    cc_filter = sitk.ConnectedComponentImageFilter()
    connected_components = cc_filter.Execute(simple_relabel_im_nec)
    num_components = cc_filter.GetObjectCount()
    final_results["Patient ID"].append(patient.name)
    final_results["Num_NEC_Components"].append(num_components)

final_results_df = pd.DataFrame.from_dict(final_results)
final_results_df.to_csv(Path("/home/gesteban/glioseg/glioseg/extras/results/number_of_nec_components_brats2023.csv"), index=False)


In [5]:
# Count how many cases have more than one component
num_multiple_components = np.sum(final_results_df["Num_NEC_Components"] > 10)
print(f"Number of cases with more than one NEC component: {num_multiple_components} out of {len(final_results_df)}")

Number of cases with more than one NEC component: 654 out of 1251


In [6]:
# Now we analyze the degree of change between the new data from BraTS2023 and after applying the NEC relabeling 

import numpy as np
import pandas as pd
from pathlib import Path
import SimpleITK as sitk

input_dir = Path("/gpfs/work1/0/prjs0971/glioseg/data/ASNR-MICCAI-BraTS2023-GLI-Challenge-TrainingData/original")

labels_mapping = {
    1: "ED", 
    2: "NEC", 
    3: "ET"
}

final_results = {"Patient ID": [] , "Num_NEC_Components": []}

mapping_2023_2021_2020_df = pd.read_csv("/home/gesteban/glioseg/glioseg/extras/results/brats2023_to_brats2021_to_brats2020_mapping.csv")
ids_2023_old_data = mapping_2023_2021_2020_df["BraTS2023"].tolist()
ids_2023_new_data = [] 

for patient in input_dir.iterdir():

    patient_id = patient.name
    if patient_id not in ids_2023_old_data:
        ids_2023_new_data.append(patient_id)



In [7]:
print(len(ids_2023_new_data))  # Print the number of new IDs detected

886


In [8]:
relabel_necrosis_2023_data = pd.read_csv("/home/gesteban/glioseg/glioseg/extras/results/relabel_necrosis_comparison_brats2023.csv")

relabel_necrosis_2023_data_new_cases = relabel_necrosis_2023_data[relabel_necrosis_2023_data["Patient ID"].isin(ids_2023_new_data)].copy()
relabel_necrosis_2023_data_new_cases["ED_difference"] = relabel_necrosis_2023_data_new_cases["ED_before_relabel"] - relabel_necrosis_2023_data_new_cases["ED_after_relabel"]
relabel_necrosis_2023_data_new_cases["NEC_difference"] = relabel_necrosis_2023_data_new_cases["NEC_before_relabel"] - relabel_necrosis_2023_data_new_cases["NEC_after_relabel"]
relabel_necrosis_2023_data_new_cases["ET_difference"] = relabel_necrosis_2023_data_new_cases["ET_before_relabel"] - relabel_necrosis_2023_data_new_cases["ET_after_relabel"]

In [9]:
# Mean, min, max differences for new cases only
for label_name in ["ED","NEC", "ET"]:
    
    mean_diff = relabel_necrosis_2023_data_new_cases[f"{label_name}_difference"].mean()
    min_diff = relabel_necrosis_2023_data_new_cases[f"{label_name}_difference"].min()
    max_diff = relabel_necrosis_2023_data_new_cases[f"{label_name}_difference"].max()
    print(f"{label_name} (new cases only) - Mean difference: {mean_diff}, Min difference: {min_diff}, Max difference: {max_diff}")
    print(f"Max difference found in patient {relabel_necrosis_2023_data_new_cases.loc[relabel_necrosis_2023_data_new_cases[f'{label_name}_difference'].idxmax(), 'Patient ID']}")
    print(f"Min difference found in patient {relabel_necrosis_2023_data_new_cases.loc[relabel_necrosis_2023_data_new_cases[f'{label_name}_difference'].idxmin(), 'Patient ID']}")

ED (new cases only) - Mean difference: -1007.8340857787811, Min difference: -171323, Max difference: 0
Max difference found in patient BraTS-GLI-00369-000
Min difference found in patient BraTS-GLI-00012-000
NEC (new cases only) - Mean difference: 1007.5902934537246, Min difference: -97, Max difference: 171323
Max difference found in patient BraTS-GLI-00012-000
Min difference found in patient BraTS-GLI-01042-000
ET (new cases only) - Mean difference: 0.06997742663656885, Min difference: 0, Max difference: 40
Max difference found in patient BraTS-GLI-01173-000
Min difference found in patient BraTS-GLI-00369-000


In [14]:
print(relabel_necrosis_2023_data_new_cases.keys())
# Find how many cases have a difference greater than 10 percentage of the total volume for each label
for label_name in ["ED","NEC", "ET"]:
    count_greater_than_10_percent = 0
    for index, row in relabel_necrosis_2023_data_new_cases.iterrows():
        before_relabel = row[f"{label_name}_before_relabel"]
        difference = abs(row[f"{label_name}_difference"])
        if before_relabel > 0:
            percentage_difference = (difference / before_relabel) * 100
            if percentage_difference > 10:
                count_greater_than_10_percent += 1
    print(f"Number of new cases with more than 10% difference with respect to volume before relabel in {label_name}: {count_greater_than_10_percent} out of {len(relabel_necrosis_2023_data_new_cases)}")

Index(['Patient ID', 'ED_before_relabel', 'ED_after_relabel',
       'NEC_before_relabel', 'NEC_after_relabel', 'ET_before_relabel',
       'ET_after_relabel', 'ED_difference', 'NEC_difference', 'ET_difference'],
      dtype='object')
Number of new cases with more than 10% difference with respect to volume before relabel in ED: 28 out of 886
Number of new cases with more than 10% difference with respect to volume before relabel in NEC: 60 out of 886
Number of new cases with more than 10% difference with respect to volume before relabel in ET: 2 out of 886
