# Segmentation Results

For each patient segmented pixels counted and summed up for each GGGs. Then the GGG which has the biggest pixel count is selected.

In [None]:
import os
import nibabel as nib
import numpy as np

data_dir = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Voxel_Results_Cropped"

def process_patient():
    patient_ids = os.listdir(data_dir)
    result_dict = {}

    for patient_id in patient_ids:
        
        # Load the mask file
        mask_file = os.path.join(data_dir, patient_id,  f"{patient_id}_{patient_id}.nii.gz")
        mask_img = nib.load(mask_file)
        mask_data = mask_img.get_fdata()

        # Define the label names
        label_names = {
            1: "gg1",
            2: "gg2",
            3: "gg3",
            4: "gg4",
            5: "gg5"
        }

        # Initialize total pixel count for each label
        total_pixel_counts = {label_value: 0 for label_value in label_names}

        # Print the shape of the mask file
        print(f"\nPatient {patient_id} - Mask Shape: {mask_data.shape}")

        # Print label distribution for each slice
        for slice_index in range(mask_data.shape[2]):
            slice_data = mask_data[:, :, slice_index]

            for label_value, label_name in label_names.items():
                pixels_count = np.sum(slice_data == label_value)
                total_pixel_counts[label_value] += pixels_count

        # Print total pixel count for each label across all slices
        print("\nTotal pixel count for each label:")
        for label_value, label_name in label_names.items():
            print(f"{label_name}: {total_pixel_counts[label_value]} pixels")
        # Determine the most occurring label
        most_occurred_label_value = max(total_pixel_counts, key=total_pixel_counts.get)
        most_occurred_label_name = label_names[most_occurred_label_value]

        # Print the most occurring label for the patient
        print(f"\nPatient {patient_id} - Most Occurring Label: {most_occurred_label_name}")

        # Store the result in a dictionary
        result_dict[patient_id] = most_occurred_label_name
    
    print("\nMost Occurring Labels for Each Patient:")
    print(result_dict)
    return result_dict

result_dict = process_patient()

Then its compared with original GGGs. Some metrics calculated.

In [None]:
gg_mapping = {6: "gg1", 7: "gg2", "7b": "gg3", 8: "gg4", 9: "gg5", 10: "gg5"}

import pandas as pd
file_path = "/local_ssd/practical_wise24/prostate_cancer/ProstateData/BREST GS.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(file_path)

# Function to map patient IDs to result_dict format
def map_patient_id(patient_id):
    return f"{int(patient_id):03d}"

# Map patient IDs in the DataFrame to result_dict format
df['mapped_patient_id'] = df['ID'].apply(map_patient_id)

# Function to map gg labels to surgery GS scores
def map_gg_to_gs(gg_label):
    return gg_mapping.get(gg_label, gg_label)

# Map the surgery GS column in the DataFrame using gg_mapping
df['mapped_surgery_GS'] = df['surgery GS'].apply(map_gg_to_gs)

# Filter DataFrame to only include patients present in result_dict
df_filtered = df[df['mapped_patient_id'].isin(result_dict.keys())]

# Compare the surgery GS scores from the DataFrame with the result_dict for filtered patients
df_filtered['GS_match'] = df_filtered.apply(lambda row: row['mapped_surgery_GS'] == result_dict.get(row['mapped_patient_id'], ''), axis=1)

# Calculate class-based accuracy using mapped_surgery_GS and GS_match
class_accuracy = {}
class_counts = {}
for gg_label in gg_mapping.values():
    correct_matches = df_filtered[(df_filtered['mapped_surgery_GS'] == gg_label) & (df_filtered['GS_match'])].shape[0]
    total_occurrences = df_filtered[df_filtered['mapped_surgery_GS'] == gg_label].shape[0]

    class_counts[gg_label] = (correct_matches, total_occurrences)
    
    if total_occurrences > 0:
        accuracy = correct_matches / total_occurrences
        class_accuracy[gg_label] = accuracy
#########################################
# Initialize a dictionary to store the count of each GG label
gg_count = {gg_label: 0 for gg_label in gg_mapping.values()}

# Count the occurrences of each GG label in result_dict
for patient_id, gg_label in result_dict.items():
    if gg_label in gg_count:
        gg_count[gg_label] += 1

# Print the count of each GG label
print("GG Label Count in Our Predictions:")
for gg_label, count in gg_count.items():
    print(f"{gg_label}: {count}")
#########################################

# Print correct case count and total case count per class
print("Correct Case Count / Total Case Count per Class:")
for gg_label, counts in class_counts.items():
    correct_count, total_count = counts
    print(f"{gg_label}: {correct_count} / {total_count}")

#########################################

# Print class-based accuracy
print("Class-Based Accuracy:")
for gg_label, accuracy in class_accuracy.items():
    print(f"{gg_label}: {accuracy * 100:.2f}%")

#########################################