In [5]:
import os
import pandas as pd
import numpy as np
import pydicom as dcm
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import trimesh

# Define patient file directory
directory = r'C:\Users\Hiran Gunawardana\Desktop\KYS Project\NEW'

# Import organ models data
folder_path = r'C:\Users\Hiran Gunawardana\Desktop\KYS Project\Organ Models_Dice'  

# Create a dictionary to store the model organs dataframes
model_organs = {}
for file in os.scandir(folder_path):
    if file.name.endswith('.csv') and file.is_file():
        file_name = os.path.splitext(file.name)[0]  # Extract the file name without extension
        df = pd.read_csv(file.path)  # Read CSV into dataframe
        model_organs[file_name] = df  # Store dataframe in the dictionary with file name as key
        
result_dict = {}

for foldername in os.listdir(directory):
    folderpath = os.path.join(directory, foldername)
    if os.path.isdir(folderpath):
        for filename in os.listdir(folderpath):
            filepath = os.path.join(folderpath, filename)
            if os.path.isfile(filepath) and filepath.endswith('.dcm'):
                dcm_file = dcm.dcmread(filepath)
                # Proceeding only if the DICOM file is from the RTSTRUCT modality
                if dcm_file.Modality == "RTSTRUCT":
                    contours = dcm_file.ROIContourSequence
                    organs = dcm_file.StructureSetROISequence

                    # Extract the organ data (coordinates of all the slices) from the RTSTRUCT file
                    
                    organ_data={}
                    for j in range(len(contours)):
                        contour_data = np.array([])
                        #for loop to get the cordinate3 from all the slices of of the organ
                        for i in range(len(contours[j].ContourSequence)):
                            contour_data=np.concatenate((contour_data,(np.array(contours[j].ContourSequence[i].ContourData))))
                            #getting all the cordinate data to a data frame. with x,y,z cordinates
                            organ_data[f"organ{j}"]=pd.DataFrame(contour_data.reshape(int(len(contour_data)/3),3),columns=("X","Y","Z"))
                    
                    organs_in_file = " ".join(organs[i].ROIName for i in range(len(contours)))
                    body_num = next((i for i, organ in enumerate(organs) if organ.ROIName == "Body" or "BODY"), None)
                    imported_organs = list(model_organs.keys())

                    #Modify organ_data to contain percentage x and y values
                    body_x_max = organ_data[f"organ{body_num}"]["X"].max()
                    body_x_min = organ_data[f"organ{body_num}"]["X"].min()
                    body_y_max = organ_data[f"organ{body_num}"]["Y"].max()
                    body_y_min = organ_data[f"organ{body_num}"]["Y"].min()
                    
                    body_x_axis = (body_x_max + body_x_min)/2
                    body_y_axis = (body_y_max + body_y_min)/2
                    
                    for i in range(len(organ_data)):
                        organ_data[f"organ{i}"]["X"] = (organ_data[f"organ{i}"]["X"]-body_x_axis)*100/body_x_max
                        organ_data[f"organ{i}"]["Y"] = (organ_data[f"organ{i}"]["Y"]-body_y_axis)*100/body_y_max
                        
                        #to remap the Z axis data
                        organ_z_max = organ_data[f"organ{i}"]["Z"].max()
                        organ_z_min = organ_data[f"organ{i}"]["Z"].min()

                        organ_data[f"organ{i}"]["Z"] = 1 - (organ_data[f"organ{i}"]["Z"] - organ_z_min) / (organ_z_max - organ_z_min)
                    
                    def dice_coefficient(data1, data2):
                        z_coords = np.unique(data1[:, 2])  # Assuming Z coordinate is in the third column

                        data1_contour_areas = 0.5 * np.abs(np.dot(data1[:, 0], np.roll(data1[:, 1], 1)) - np.dot(data1[:, 1], np.roll(data1[:, 0], 1)))
                        data2_contour_areas = 0.5 * np.abs(np.dot(data2[:, 0], np.roll(data2[:, 1], 1)) - np.dot(data2[:, 1], np.roll(data2[:, 0], 1)))

                        slice_thickness = np.abs(data1[:, 2].max() - data1[:, 2].min()) / (len(z_coords) - 1)

                        data1_volume = (np.sum(data1_contour_areas) * slice_thickness) / 1000000
                        data2_volume = (np.sum(data2_contour_areas) * slice_thickness) / 1000000

                        intersection_volume = np.sum(np.minimum(data1_contour_areas, data2_contour_areas)) * slice_thickness / 1000000

                        dice_coefficient = 2 * intersection_volume / (data1_volume + data2_volume)

                        return dice_coefficient
                    
                    results = pd.DataFrame(columns=['organ_index', 'model_organ', 'dice_coefficient'])
    
                    for i in range(len(contours)):
                        for model_organ in model_organs.keys():
                            data1 = model_organs[f"{model_organ}"][['X', 'Y', 'Z']].values
                            data2 = organ_data[f"organ{i}"].values


                            coeff = dice_coefficient(data1, data2)

                            # Create a temporary DataFrame for current iteration
                            temp_df = pd.DataFrame({'organ_index': [i], 'model_organ': [model_organ], 'dice_coefficient': [f"{coeff:.4f}"]})
                            results = pd.concat([results, temp_df], ignore_index=True)
                                

                    def calculate_similarity(model_data, patient_data):
                        model_points = model_data[['X', 'Y', 'Z']].values
                        patient_points = patient_data[['X', 'Y', 'Z']].values

                        min_length = min(model_points.shape[0], patient_points.shape[0])

                        # Calculate the Euclidean distance between corresponding points
                        distances = np.linalg.norm(model_points[:min_length] - patient_points[:min_length], axis=1)

                        # Compute the similarity score as the average distance
                        similarity = 1 / (1 + np.mean(distances))

                        return similarity
                    
                    # Define a function to get the name of the organ with highest dice coefficient
                   
                    def get_matched_organ_name(i):
                        organ_results = results.loc[results['organ_index'] == i].copy()
                        organ_results.loc[:, 'dice_coefficient'] = organ_results['dice_coefficient'].astype(float)
                        organ_results = organ_results.nlargest(3, 'dice_coefficient')

                        if not organ_results.empty:
                            matched_organs = organ_results['model_organ'].tolist()
                            best_match = None
                            best_similarity = -1

                            for organ_name in matched_organs:
                                model_data = model_organs[organ_name]  # Retrieve the model organ data
                                patient_data = organ_data[f"organ{i}"]  # Retrieve the patient organ data

                                # Compare the X, Y, and Z point distributions
                                similarity = calculate_similarity(model_data, patient_data)

                                if similarity > best_similarity:
                                    best_similarity = similarity
                                    best_match = organ_name

                            return best_match
                        else:
                            return "--"
                        
                    # Create a dataframe with the algorithm results for the current RTSTRUCT file
                    algorithm = pd.DataFrame({

                    "organ_number": range(len(organs)),
                    "organ_name": [organ.ROIName for organ in organs],
                    "organ_identified": [get_matched_organ_name(i) for i in range(len(organs))],

                    })

                    algorithm["test"] = algorithm.apply(lambda row: "Pass" if ((row["organ_identified"] in row["organ_name"]) or (row["organ_name"] in row["organ_identified"]) ) else "Fail", axis=1)

                    
                    if foldername not in result_dict:
                        result_dict[foldername] = []
                    result_dict[foldername].append(algorithm)
                    
                    def results():
                        for key, value in result_dict.items():
                            print(f"PATIENT {key}:")
                            print(value)
                            print()
                            
                    def result_stats():
                        result_stats = result_dict["1"][0][["organ_name", "test"]]

                        for i in range(2, len(result_dict) + 1):
                            result_stats = pd.concat([result_stats, result_dict[str(i)][0][["organ_name", "test"]]], ignore_index=True)

                        organ_value_counts = result_stats["organ_name"].value_counts()
                        organ_pass_counts = result_stats[result_stats["test"]=="Pass"]["organ_name"].value_counts()
                        pass_percentage = organ_pass_counts*100/organ_value_counts
                        
                        return pass_percentage.sort_values(ascending=False)

In [6]:
results()

PATIENT 1:
[   organ_number  organ_name organ_identified  test
0             0        Body             Body  Pass
1             1    Breast_R         Breast_R  Pass
2             2       Heart            Heart  Pass
3             3      Lung_L           Lung_L  Pass
4             4      Lung_R           Lung_R  Pass
5             5  PTV_InMarg              PTV  Pass]

PATIENT 10:
[   organ_number  organ_name organ_identified  test
0             0        Body           Lung_R  Fail
1             1    Breast_R         Breast_R  Pass
2             2       Heart            Heart  Pass
3             3      Lung_L           Lung_L  Pass
4             4      Lung_R           Lung_R  Pass
5             5  PTV_InMarg              PTV  Pass]

PATIENT 100:
[   organ_number  organ_name organ_identified  test
0             0        Body           Lung_R  Fail
1             1    Breast_R         Breast_R  Pass
2             2       Heart            Heart  Pass
3             3      Lung_L           L

5             5  PTV_InMarg              PTV  Pass]

PATIENT 83:
[   organ_number  organ_name organ_identified  test
0             0    Breast_R           Lung_L  Fail
1             1       Heart           Lung_R  Fail
2             2      Lung_L           Lung_R  Fail
3             3      Lung_R           Lung_R  Pass
4             4        Body             Body  Pass
5             5  PTV_InMarg           Lung_R  Fail]

PATIENT 84:
[   organ_number  organ_name organ_identified  test
0             0        Body           Lung_R  Fail
1             1    Breast_R           Lung_R  Fail
2             2       Heart            Heart  Pass
3             3      Lung_L           Lung_L  Pass
4             4      Lung_R           Lung_R  Pass
5             5  PTV_InMarg              PTV  Pass]

PATIENT 85:
[   organ_number  organ_name organ_identified  test
0             0        Body           Lung_L  Fail
1             1    Breast_R           Lung_R  Fail
2             2       Heart          

In [7]:
result_stats()

Lung_R        94.000000
PTV_InMarg    91.000000
Heart         85.000000
Lung_L        82.000000
Breast_R      51.000000
Body           9.183673
BODY                NaN
Name: organ_name, dtype: float64