In [2]:
import os
import sys
import shutil
import json
import pandas as pd
import numpy as np

# set environment variable here.
os.environ["nnUNet_preprocessed"] = "/home/bhatti_uhn/nnUNet_preprocessed"
os.environ["nnUNet_results"] = "/home/bhatti_uhn/nnUNet_results"
os.environ["nnUNet_raw"] = "/home/bhatti_uhn/nnUNet_raw"

# Ensure that environment variables are set correctly # from run_training.py
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

from nnunetv2.dataset_conversion import generate_dataset_json
from nnunetv2.run.run_training import run_training_entry
import logging

from sklearn.metrics import classification_report, f1_score, confusion_matrix

logging.basicConfig(level=logging.WARNING)

In [3]:
# Define paths
base_dir = '/home/bhatti_uhn/Dataset/UHN-MedImg3D-ML-quiz'
nnunet_base = '/home/bhatti_uhn/nnUNet_raw/Dataset877_UHNMedImg3DVAL'

images_tr_dir = os.path.join(nnunet_base, 'imagesTr')
labels_tr_dir = os.path.join(nnunet_base, 'labelsTr')
images_ts_dir = os.path.join(nnunet_base, 'imagesTs')

# Create directories
os.makedirs(images_tr_dir, exist_ok=True)
os.makedirs(labels_tr_dir, exist_ok=True)
os.makedirs(images_ts_dir, exist_ok=True)
    

trainSrc = os.path.join(base_dir, 'validation')
testSrc = os.path.join(base_dir, 'validation')

# list subdirectories of trainSrc # remove .DS_Store
train_subdirs = [sdir for sdir in os.listdir(trainSrc) if not sdir.startswith('.')]
train_subdirs = sorted(train_subdirs)
train_subdirs

def copy_files_to_raw(images_tr_dir, labels_tr_dir, trainSrc, train_subdirs):

    for i, subdir in enumerate(train_subdirs):
        print(f"Processing {subdir}...")

    # create path for reading subdirectories
        train_subdir = os.path.join(trainSrc, subdir)

    # list files in subdirectory
        files = [s for s in sorted(os.listdir(train_subdir)) if not s.startswith('.')]

    # separate images and labels files: image: 'quiz_2_002_0000.nii.gz', and label: 'quiz_2_002.nii.gz'
        for j, fileName in enumerate(files):
            if len(fileName.split('_')) == 4: # assuming quiz_subType_patID is a valid CASE_ID. and 0000 is the modality.
                fileNameSplit = fileName.split('_')
                logging.info(f"Processing {fileNameSplit}")

                newFileName = fileNameSplit[0] + '_' + fileNameSplit[1] + '_' + fileNameSplit[2] + '_' + fileNameSplit[3]
                tr_copyPath = os.path.join(train_subdir, fileName)

                logging.info(f"Copying {tr_copyPath} to {images_tr_dir}")
                shutil.copy(tr_copyPath, images_tr_dir)
                # rename the file
                # os.rename(os.path.join(images_tr_dir, fileName), os.path.join(images_tr_dir, newFileName))

            elif len(fileName.split('_')) == 3:
                fileNameSplit = fileName.split('_')
                newFileName = fileNameSplit[0] + '_' + fileNameSplit[1] + '_' + fileNameSplit[2]

                shutil.copy(os.path.join(train_subdir, fileName), labels_tr_dir)
                # rename the file
                # os.rename(os.path.join(labels_tr_dir, fileName), os.path.join(labels_tr_dir, newFileName))

            else: 
                raise ValueError(f"File {fileName} does not match the expected format.")


def copy_testFiles_to_raw(images_ts_dir, testSrc):

    files = [s for s in sorted(os.listdir(testSrc)) if not s.startswith('.')]

    for j, fileName in enumerate(files):
        shutil.copy(os.path.join(testSrc, fileName), images_ts_dir)

## uncomment to run ###
copy_files_to_raw(images_tr_dir, labels_tr_dir, trainSrc, train_subdirs)
# copy_testFiles_to_raw(images_ts_dir, testSrc)

Processing subtype0...
Processing subtype1...
Processing subtype2...


In [4]:
# Interesting. label integrity check fails. Expected: [np.int64(0), np.int64(1), np.int64(2)] Found: [0.        1.0000153 2.       ]
# fixing the label files

import SimpleITK as sitk
import numpy as np

# Function to correct labels
def correct_labels(image):
    array = sitk.GetArrayFromImage(image)
    array = np.where(np.isclose(array, 1.0000153), 1, array)  # Correcting the label value
    array = np.int64(array)
    corrected_image = sitk.GetImageFromArray(array)
    corrected_image.CopyInformation(image)
    return corrected_image

# Load the problematic image
def correct_all_type_labels(labels_tr_dir):
    labelImagesInDir = [sdir for sdir in sorted(os.listdir(labels_tr_dir)) if not sdir.startswith('.')]

    for labelImgIdx, labelImg in enumerate(labelImagesInDir):
        imagePath = os.path.join(labels_tr_dir, labelImg)
        image = sitk.ReadImage(imagePath)

    # Correct the labels
        corrected_image = correct_labels(image)
        sitk.WriteImage(corrected_image, imagePath)


## uncomment to run ###
correct_all_type_labels(labels_tr_dir)

# create dataset.json
channel_names = {0: "CT"}

labels = {
    'background': 0,
    'pancreas': 1,
    'lesion': 2
}

num_training_cases = len(os.listdir(images_tr_dir))
file_ending = '.nii.gz'

## uncomment to run ###
generate_dataset_json.generate_dataset_json(nnunet_base, channel_names, labels, num_training_cases, file_ending)

In [5]:
def correct_spacing(image_path, seg_path):
    # Load the image and segmentation
    image = sitk.ReadImage(image_path)
    seg = sitk.ReadImage(seg_path)
    
    # Get the spacing from the image
    image_spacing = image.GetSpacing()
    seg_spacing = seg.GetSpacing()
    
    # Compare and correct spacing if needed
    if not np.allclose(image_spacing, seg_spacing, atol=1e-7):
        print(f"Correcting spacing for {seg_path}")
        seg.SetSpacing(image_spacing)
        
        # Save the corrected segmentation
        sitk.WriteImage(seg, seg_path)
        print(f"Corrected segmentation saved to {seg_path}")

# Correct spacing for the segmentation files
# list all files under imagesTr

 ## uncomment to run ###
imageFilesInDir = [sdir for sdir in sorted(os.listdir(images_tr_dir)) if not sdir.startswith('.')]

for imageIdx, image in enumerate(imageFilesInDir):
    imgPath = os.path.join(images_tr_dir, image)
    segPath = os.path.join(labels_tr_dir, image.replace('_0000', ''))

    correct_spacing(imgPath, segPath)

Correcting spacing for /home/bhatti_uhn/nnUNet_raw/Dataset877_UHNMedImg3DVAL/labelsTr/quiz_0_168.nii.gz
Corrected segmentation saved to /home/bhatti_uhn/nnUNet_raw/Dataset877_UHNMedImg3DVAL/labelsTr/quiz_0_168.nii.gz


In [11]:
# read the "/home/bhatti_uhn/nnUNet_results/Dataset876_UHNMedImg3D/nnUnetSegClsTrainer__nnUNetPlans__3d_fullres/fold_all/validation" directory and classification.npy file.
# store in a df and save to csv
# df template: Name | Label | SubType (Prediction)

# Define the path to the classification file
valSrcPath = "/home/bhatti_uhn/nnUNet_raw/Dataset877_UHNMedImg3DVAL/imagesTr_3d_fullres"

# list classification.npy files
classification_files = [sdir for sdir in sorted(os.listdir(valSrcPath)) if sdir.split('_')[-1] == 'classification.npy']

valResults = {}
df = pd.DataFrame(columns=['Name', 'Label', 'SubType'])

for classFile in classification_files:
    classFilePath = os.path.join(valSrcPath, classFile)
    # print(f"Processing {classFilePath}")

    # target class
    target_class = int(classFile.split('_')[1])

    # Load the classification file
    classification = np.load(classFilePath)
    # keep argmax only
    class_pred = np.argmax(classification, axis=-1)

    # Create a DataFrame
    new_row = pd.DataFrame({'Name': [classFile.replace('_classification.npy', '.nii.gz')], 'Label': [int(target_class)], 'SubType': class_pred})
    df = pd.concat([df, new_row], ignore_index=True)

    # # Save the DataFrame to a CSV file
    # df.to_csv(classFilePath.replace('.npy', '.csv'), index=False)

    # print(f"Saved classification to {classFilePath.replace('.npy', '.csv')}")

In [20]:
# Ensure labels are of integer type
df['Label'] = df['Label'].astype(int)
df['SubType'] = df['SubType'].astype(int)

# Convert columns to numpy arrays
preds = df['SubType'].values
targets = df['Label'].values

len(preds), len(targets)

# Calculate accuracy
accuracy = np.mean(preds == targets)
print(f"Accuracy: {accuracy}")

print(classification_report(preds, targets))

Accuracy: 0.5833333333333334
              precision    recall  f1-score   support

           0       0.44      1.00      0.62         4
           1       0.67      0.59      0.62        17
           2       0.58      0.47      0.52        15

    accuracy                           0.58        36
   macro avg       0.56      0.68      0.59        36
weighted avg       0.61      0.58      0.58        36



In [10]:
df

Unnamed: 0,Name,Label,SubType,Correct
0,quiz_0_168.nii.gz,0,1,False
1,quiz_0_171.nii.gz,0,0,True
2,quiz_0_174.nii.gz,0,2,False
3,quiz_0_184.nii.gz,0,2,False
4,quiz_0_187.nii.gz,0,0,True
5,quiz_0_189.nii.gz,0,1,False
6,quiz_0_244.nii.gz,0,2,False
7,quiz_0_253.nii.gz,0,0,True
8,quiz_0_254.nii.gz,0,0,True
9,quiz_1_090.nii.gz,1,1,True


In [11]:
class_pred

array([2])

In [12]:
target_class

0

In [7]:
# from nnunetv2.paths import nnUNet_results, nnUNet_raw
# import torch
# from batchgenerators.utilities.file_and_folder_operations import join
# from nnunetv2.inference.predict_from_raw_data_cls import nnUNetSegClsPredictor
# from nnunetv2.imageio.simpleitk_reader_writer import SimpleITKIO

# # nnUNetv2_predict -d 3 -f 0 -c 3d_lowres -i imagesTs -o imagesTs_predlowres --continue_prediction
# # /home/bhatti_uhn/nnUNet_results/Dataset876_UHNMedImg3D/nnUnetSegClsTrainer__nnUNetPlans__3d_fullres
# # /home/bhatti_uhn/nnUNet_raw/Dataset876_UHNMedImg3D/imagesTs
# # instantiate the nnUNetPredictor
# predictor = nnUNetSegClsPredictor(
#     tile_step_size=0.5,
#     use_gaussian=True,
#     use_mirroring=True,
#     perform_everything_on_device=True,
#     device=torch.device("cuda", 0),
#     verbose=False,
#     verbose_preprocessing=False,
#     allow_tqdm=True,
# )
# # initializes the network architecture, loads the checkpoint
# predictor.initialize_from_trained_model_folder(
#     join(
#         nnUNet_results,
#         "Dataset876_UHNMedImg3D/nnUnetSegClsTrainer__nnUNetPlans__3d_fullres",
#     ),
#     use_folds="all",
#     checkpoint_name="checkpoint_final.pth",
# )
# # variant 1: give input and output folders
# predictor.predict_from_files(
#     join(nnUNet_raw, "Dataset877_UHNMedImg3DVAL/imagesTr"),
#     join(nnUNet_raw, "Dataset877_UHNMedImg3DVAL/imagesTr_3d_fullres"),
#     save_probabilities=False,
#     overwrite=False,
#     num_processes_preprocessing=2,
#     num_processes_segmentation_export=2,
#     folder_with_segs_from_prev_stage=None,
#     num_parts=1,
#     part_id=0,
# )

  checkpoint = torch.load(join(model_training_output_dir, f'fold_{f}', checkpoint_name),


{'n_stages': 6, 'features_per_stage': [32, 64, 128, 256, 320, 320], 'conv_op': 'torch.nn.modules.conv.Conv3d', 'kernel_sizes': [[1, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3]], 'strides': [[1, 1, 1], [1, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]], 'n_conv_per_stage': [2, 2, 2, 2, 2, 2], 'n_conv_per_stage_decoder': [2, 2, 2, 2, 2], 'conv_bias': True, 'norm_op': 'torch.nn.modules.instancenorm.InstanceNorm3d', 'norm_op_kwargs': {'eps': 1e-05, 'affine': True}, 'dropout_op': None, 'dropout_op_kwargs': None, 'nonlin': 'torch.nn.LeakyReLU', 'nonlin_kwargs': {'inplace': True}}
There are 36 cases in the source folder
I am process 0 out of 1 (max process ID is 0, we start counting with 0!)
There are 36 cases that I would like to predict
overwrite was set to False, so I am only working on cases that haven't been predicted yet. That's 36 cases.

Predicting quiz_0_168:
perform_everything_on_device: True


100%|██████████| 4/4 [00:01<00:00,  3.88it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_168

Predicting quiz_0_171:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.97it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_171

Predicting quiz_0_174:
perform_everything_on_device: True


100%|██████████| 3/3 [00:00<00:00,  9.04it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_174

Predicting quiz_0_184:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.96it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_184

Predicting quiz_0_187:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.73it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_187

Predicting quiz_0_189:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 13.60it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_189

Predicting quiz_0_244:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  8.77it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_244

Predicting quiz_0_253:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.97it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_253

Predicting quiz_0_254:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.57it/s]


sending off prediction to background worker for resampling and export
done with quiz_0_254

Predicting quiz_1_090:
perform_everything_on_device: True


100%|██████████| 4/4 [00:00<00:00,  8.63it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_090

Predicting quiz_1_093:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  8.03it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_093

Predicting quiz_1_094:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 11.75it/s]

sending off prediction to background worker for resampling and export
done with quiz_1_094






Predicting quiz_1_154:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.97it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_154

Predicting quiz_1_158:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.96it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_158

Predicting quiz_1_164:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.38it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_164

Predicting quiz_1_166:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 13.25it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_166

Predicting quiz_1_211:
perform_everything_on_device: True


100%|██████████| 4/4 [00:00<00:00,  8.63it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_211

Predicting quiz_1_213:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 14.62it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_213

Predicting quiz_1_221:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 14.62it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_221

Predicting quiz_1_227:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00,  9.39it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_227

Predicting quiz_1_231:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.37it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_231

Predicting quiz_1_242:
perform_everything_on_device: True


100%|██████████| 4/4 [00:00<00:00,  8.54it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_242

Predicting quiz_1_331:
perform_everything_on_device: True


100%|██████████| 8/8 [00:00<00:00,  8.09it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_331

Predicting quiz_1_335:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 14.59it/s]


sending off prediction to background worker for resampling and export
done with quiz_1_335

Predicting quiz_2_074:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.96it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_074

Predicting quiz_2_080:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.54it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_080

Predicting quiz_2_084:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.95it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_084

Predicting quiz_2_085:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.96it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_085

Predicting quiz_2_088:
perform_everything_on_device: True


100%|██████████| 8/8 [00:00<00:00,  8.10it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_088

Predicting quiz_2_089:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 14.63it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_089

Predicting quiz_2_098:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.58it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_098

Predicting quiz_2_191:
perform_everything_on_device: True


100%|██████████| 8/8 [00:00<00:00,  8.09it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_191

Predicting quiz_2_241:
perform_everything_on_device: True


100%|██████████| 4/4 [00:00<00:00,  8.63it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_241

Predicting quiz_2_364:
perform_everything_on_device: True


100%|██████████| 1/1 [00:00<00:00, 14.63it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_364

Predicting quiz_2_377:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.95it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_377

Predicting quiz_2_379:
perform_everything_on_device: True


100%|██████████| 2/2 [00:00<00:00,  9.96it/s]


sending off prediction to background worker for resampling and export
done with quiz_2_379


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [None]:
if __name__ == '__main__':
    from nnunetv2.paths import nnUNet_results, nnUNet_raw
    import torch
    from batchgenerators.utilities.file_and_folder_operations import join
    from nnunetv2.inference.predict_from_raw_data import nnUNetPredictor
    from nnunetv2.imageio.simpleitk_reader_writer import SimpleITKIO

    # nnUNetv2_predict -d 3 -f 0 -c 3d_lowres -i imagesTs -o imagesTs_predlowres --continue_prediction

    # instantiate the nnUNetPredictor
    predictor = nnUNetPredictor(
        tile_step_size=0.5,
        use_gaussian=True,
        use_mirroring=True,
        perform_everything_on_device=True,
        device=torch.device('cuda', 0),
        verbose=False,
        verbose_preprocessing=False,
        allow_tqdm=True
    )
    # initializes the network architecture, loads the checkpoint
    predictor.initialize_from_trained_model_folder(
        join(nnUNet_results, 'Dataset003_Liver/nnUNetTrainer__nnUNetPlans__3d_lowres'),
        use_folds=(0,),
        checkpoint_name='checkpoint_final.pth',
    )
    # variant 1: give input and output folders
    predictor.predict_from_files(join(nnUNet_raw, 'Dataset003_Liver/imagesTs'),
                                 join(nnUNet_raw, 'Dataset003_Liver/imagesTs_predlowres'),
                                 save_probabilities=False, overwrite=False,
                                 num_processes_preprocessing=2, num_processes_segmentation_export=2,
                                 folder_with_segs_from_prev_stage=None, num_parts=1, part_id=0)