In [1]:
import os
import numpy as np
import pandas as pd
import pydicom
import shutil

In [4]:
# Paths
dicom_dir_path = '/Users/arjunmoorthy/Desktop/t2axialimages'  # Directory where DICOM files are stored
targeted_bx_path = '/Users/arjunmoorthy/Desktop/Research_Capstone/targeted_biopsy_patients.xlsx'  
output_dir_base_path = '/Users/arjunmoorthy/Desktop/Research_Capstone/Image Data'  

# Ensure the output directories exist
cancer_output_dir = os.path.join(output_dir_base_path, 'cancer')
non_cancer_output_dir = os.path.join(output_dir_base_path, 'non_cancer')
os.makedirs(cancer_output_dir, exist_ok=True)
os.makedirs(non_cancer_output_dir, exist_ok=True)

# Load the Excel file
df = pd.read_excel(targeted_bx_path)

In [8]:
# Function to find and copy DICOM slices closest to the given z-coordinates
def find_and_copy_closest_slices(patient_id, z_coords, output_dir):
    patient_dicom_dir = os.path.join(dicom_dir_path, patient_id)
    closest_file_path = None
    min_diff = np.inf

    # Walk through all subdirectories
    for root, dirs, files in os.walk(patient_dicom_dir):
        for file in files:
            if file.endswith(".dcm"):
                file_path = os.path.join(root, file)
                ds = pydicom.dcmread(file_path, force=True)
                
                if 'ImagePositionPatient' in ds:
                    dicom_z_coord = ds.ImagePositionPatient[2]
                    for z_coord in z_coords:
                        diff = abs(dicom_z_coord - z_coord)
                        
                        if diff < min_diff:
                            min_diff = diff
                            closest_file_path = file_path
    
    # Copy the closest slice to the output directory
    if closest_file_path:
        output_patient_dir = os.path.join(output_dir, patient_id)
        os.makedirs(output_patient_dir, exist_ok=True)
        shutil.copy(closest_file_path, output_patient_dir)
        print(f"Copied: {closest_file_path} to {output_patient_dir}")

In [9]:
# Loop through each row in the DataFrame to process each patient
for index, row in df.iterrows():
    patient_id = str(row['Patient Number'])
    gleason_score = row['Primary Gleason']
    z_coords = [row['Bx Tip Z (MRI Coord)'], row['Bx Base Z (MRI Coord)']]  # Assuming these columns exist

    # Determine the output directory based on the Gleason score
    if gleason_score >= 3:
        output_dir = cancer_output_dir
    else:
        output_dir = non_cancer_output_dir
    
    find_and_copy_closest_slices(patient_id, z_coords, output_dir)

Copied: /Users/arjunmoorthy/Desktop/t2axialimages/Prostate-MRI-US-Biopsy-0001/06-28-2009-NA-MRI PROSTATE W WO CONTRAST-51743/11.000000-t2spcrstaxial oblProstate-90221/1-23.dcm to /Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/non_cancer/Prostate-MRI-US-Biopsy-0001
Copied: /Users/arjunmoorthy/Desktop/t2axialimages/Prostate-MRI-US-Biopsy-0001/06-28-2009-NA-MRI PROSTATE W WO CONTRAST-51743/11.000000-t2spcrstaxial oblProstate-90221/1-16.dcm to /Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/non_cancer/Prostate-MRI-US-Biopsy-0001
Copied: /Users/arjunmoorthy/Desktop/t2axialimages/Prostate-MRI-US-Biopsy-0001/06-28-2009-NA-MRI PROSTATE W WO CONTRAST-51743/11.000000-t2spcrstaxial oblProstate-90221/1-13.dcm to /Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/non_cancer/Prostate-MRI-US-Biopsy-0001
Copied: /Users/arjunmoorthy/Desktop/t2axialimages/Prostate-MRI-US-Biopsy-0001/06-28-2009-NA-MRI PROSTATE W WO CONTRAST-51743/11.000000-t2spcrstaxial oblProstate-90221/1-14.dcm