In [None]:
import numpy as np

# crop ct
# 3d version
def _crop_ct_3D(kidney_center, crop, ct_data):
    center_x, center_y, center_z = kidney_center
    crop_x, crop_y, crop_z = crop
    max_x, max_y, max_z = ct_data.shape
    start_x, start_y, start_z = center_x - crop_x, center_y - crop_y, center_z - crop_z
    end_x, end_y, end_z = center_x + crop_x, center_y + crop_y, center_z + crop_z
    
    _start_x, _start_y, _start_z = max(0, start_x), max(0, start_y), max(0, start_z)
    _end_x, _end_y, _end_z = min(max_x, end_x), min(max_y, end_y), min(max_z, end_z)
        
    kidney_crop = ct_data[_start_x: _end_x, _start_y: _end_y, _start_z: _end_z]
    kidney_crop = np.pad(kidney_crop,
                  ((_start_x - start_x, end_x - _end_x), (_start_y - start_y, end_y - _end_y), (_start_z - start_z, end_z - _end_z)),
                  'constant',
                   constant_values=-1000)
    
    return kidney_crop

def crop_ct_3D(ct_data, kidney_center, spacing, crop_size_mm):
    r_kidney_center, l_kidney_center = kidney_center['r_kidney_center'], kidney_center['l_kidney_center']
    spacing_x, spacing_y, spacing_z = spacing
    crop_size_x, crop_size_y, crop_size_z = crop_size_mm
    
    crop_size_x, crop_size_y, crop_size_z = int(crop_size_x / 2), int(crop_size_y / 2), int(crop_size_z / 2)
    crop_x, crop_y, crop_z = int(crop_size_x / spacing_x), int(crop_size_y / spacing_y), int(crop_size_z / spacing_z)
    
    crop = (crop_x, crop_y, crop_z)

    r_kidney_crop = _crop_ct_3D(kidney_center=r_kidney_center,
                            crop=crop,
                            ct_data=ct_data
                            )
    
    l_kidney_crop = _crop_ct_3D(kidney_center=l_kidney_center,
                        crop=crop,
                        ct_data=ct_data
                        )
    
    return {
        'r_kidney_crop': r_kidney_crop,
        'l_kidney_crop': l_kidney_crop
    }

In [None]:
"""

This code is used to crop 3D CT images to the kidney region, using the kidney center coordinates and CT spacing. 
The cropped images are saved in the same directory as the original images, with the suffix "_L" and "_R" to indicate left and right kidneys, respectively.

The code is based on the following steps:   

1. Load the CT image data and the kidney center coordinates.
2. Calculate the kidney region based on the kidney center coordinates and the CT spacing.
3. Crop the CT image data to the kidney region.
4. Save the cropped images.

"""

import os
import numpy as np
import pandas as pd
from tqdm import tqdm

datasets = [
    "RenalCLIP1.0", 
    "RenalCLIP2.0", 
    "RenalCLIP3.0", 
    "RenalCLIP3.1",
]

save_root_template = '/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/{dataset}/crop_img-KC_v3'
data_root_template = "/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/{dataset}/nii_npy_aligned"
kc_root_template = '/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/{dataset}/RenalCLIP-KC'

df_template = '/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/{dataset}/ct_statistics.xlsx'
df2 = pd.read_excel('/cpfs01/projects-SSD/cfff-bb5d866c17c2_SSD/public/RenalCLIP/text_files/RenalCLIP_3.1.6.xlsx', sheet_name='internal_pretrain')
pid_list = df2['医技号'].astype(str)

MODALITIES = ["N", "A", "V", "D"]

for dataset in datasets:
    save_root = save_root_template.format(dataset=dataset)
    data_root = data_root_template.format(dataset=dataset)
    kc_root = kc_root_template.format(dataset=dataset)
    df = pd.read_excel(df_template.format(dataset=dataset))
    
    if not os.path.exists(kc_root):
        print(f"Skipping {dataset} as KC path does not exist.")
        continue

    for pid in tqdm(pid_list):
        pid = str(pid)
        if os.path.exists(os.path.join(data_root, pid)):
            filtered_df = df[df['PatientID'] == pid]
            try:
                kidney_center = np.load(os.path.join(kc_root, pid, "Coord.npy"), allow_pickle=True).item()
            except:
                print(f"Skipping {pid} in {dataset}, KC file not found.")
                continue

            if 'l_kidney_center' in kidney_center:
                kidney_center['l_kidney_center'][0] += 256
            
            for modality in MODALITIES:
                data_path = os.path.join(data_root, pid, f"{modality}_image_data.npy")
                if os.path.exists(data_path):
                    ct_data = np.load(data_path, allow_pickle=True)
                    spacing_x = filtered_df[f'{modality}_Spacing_X'].values[0]
                    spacing_y = filtered_df[f'{modality}_Spacing_Y'].values[0]
                    spacing_z = filtered_df[f'{modality}_Spacing_Z'].values[0]

                    
                    try:
                        kidney_crop = crop_ct_3D(ct_data=ct_data, 
                                                 kidney_center=kidney_center,
                                                 spacing=(spacing_x, spacing_y, spacing_z), 
                                                 crop_size_mm=(140, 140, 160))
                    except:
                        print(f"Error in {pid} in {dataset}. Modality: {modality}, spacing: ({spacing_x}, {spacing_y}, {spacing_z})")
                        continue
                
                    if not os.path.exists(os.path.join(save_root, pid)):
                        os.makedirs(os.path.join(save_root, pid))

                    l_kidney_crop, r_kidney_crop = kidney_crop["l_kidney_crop"], kidney_crop["r_kidney_crop"]
                    np.save(os.path.join(save_root, pid, f"{modality}_L_image_data.npy"), l_kidney_crop)
                    np.save(os.path.join(save_root, pid, f"{modality}_R_image_data.npy"), r_kidney_crop)
                else:
                    continue
        else:
            continue

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

def get_kidney_center(pid, tc_root):
    tc_path = os.path.join(tc_root, pid, "TC_coordinates_V2.npy")
    tc = np.load(tc_path, allow_pickle=True).item()
    r_kidney_center, l_kidney_center = tc['MNK_right'], tc['MNK_left']
    
    return {
        'r_kidney_center': r_kidney_center,
        'l_kidney_center': l_kidney_center,
    }


datasets = ["XM_ALL", "LY_ALL", "ZY_ALL", "RJ_ALL", "SD_ALL", "TCIA_ALL"]

for dataset in datasets:
    print(f"Processing dataset: {dataset}")
    
    # Modify paths for current dataset
    save_root = f'/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/ExternalValid/{dataset}/crop_img-TC_v3'
    data_root = f'/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/ExternalValid/{dataset}/nii_npy_aligned'
    tc_root = f'/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/ExternalValid/{dataset}/RenalCLIP-TC-Aligned'
    df = pd.read_excel(f'/cpfs01/projects-HDD/cfff-bb5d866c17c2_HDD/public/ExternalValid/{dataset}/ct_statistics.xlsx', dtype={"PatientID": str})
    tumor_side_df = pd.read_excel('/cpfs01/projects-SSD/cfff-bb5d866c17c2_SSD/public/RenalCLIP/text_files/RenalCLIP_4.2.xlsx', sheet_name=dataset.split("_")[0], dtype={"医技号": str})

    # Map from patient ID to tumor side
    tumor_side_map = dict(zip(tumor_side_df["医技号"], tumor_side_df["tumor_side"]))

    pid_list = tumor_side_df["医技号"]

    MODALITIES = ["N", "A", "V", "D"]

    for pid in tqdm(pid_list):
        pid = str(pid)

        tumor_side = tumor_side_map.get(pid)
        if not tumor_side:
            print(f"PID {pid} not found in tumor_side mapping.")
            continue

        filtered_df = df[df['PatientID'].astype(str) == pid]
        kidney_center = get_kidney_center(pid, tc_root)

        for modality in MODALITIES:
            data_path = os.path.join(data_root, pid, f"{modality}_image_data.npy")

            if os.path.exists(data_path):
                ct_data = np.load(data_path, allow_pickle=True)
                
                # Extract spacing values
                spacing_x = filtered_df[f'{modality}_Spacing_X'].values[0]
                spacing_y = filtered_df[f'{modality}_Spacing_Y'].values[0]
                spacing_z = filtered_df[f'{modality}_Spacing_Z'].values[0]

                # Perform cropping
                kidney_crop = crop_ct_3D(
                    ct_data=ct_data, 
                    kidney_center=kidney_center, 
                    spacing=(spacing_x, spacing_y, spacing_z), 
                    crop_size_mm=(140, 140, 160)
                )

                # Select crop result based on tumor side
                crop_key = f"{tumor_side.lower()}_kidney_crop"
                crop_result = kidney_crop[crop_key]

                # Save cropped result
                save_dir = os.path.join(save_root, pid)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                np.save(os.path.join(save_dir, f"{modality}_{tumor_side}_image_data.npy"), crop_result)
            else:
                print(f"Data path: {data_path} not existed")