In [51]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import resource
import tensorflow_datasets as tfds
import random

In [52]:
# load the santa maria dataset, specifically the torax3d_1 partition. If you want to get another partition
# replace with 'pet_01' or 'body_01'
sample_dataset, info = tfds.load('santa_maria_dataset/torax3d_1', with_info=True)

In [60]:
def get_training_testing_sm_datasets(split_val=0.8, random_seed):
    '''Creates random training and testing balanced partitions given the split_val. 
    Returns two datasets, one training and other for testing.'''
    
    # Define the positive and negative patients
    pos_patients = [f'sm_{str(i).zfill(3)}' for i in range(1, 13)]
    neg_patients = [f'sm_{str(i).zfill(3)}' for i in range(13, 36)]
    
    # Get the split keys (splits) of the dataset
    split_keys = list(info.splits.keys())
    
    # Find positive and negative patients that are also in the split keys
    pos_patients = [patient for patient in pos_patients if patient in split_keys]
    neg_patients = [patient for patient in neg_patients if patient in split_keys]
    
    # Shuffle the order of positive and negative patients for randomness
    random.shuffle(pos_patients, random_seed)
    random.shuffle(neg_patients, random_seed)
    
    # Calculate the number of patients for training and testing
    train_pos_count = int(split_val * len(pos_patients))
    train_neg_count = int(split_val * len(neg_patients))
    
    # Create the training and testing sets
    training_patients = pos_patients[:train_pos_count] + neg_patients[:train_neg_count]
    testing_patients = pos_patients[train_pos_count:] + neg_patients[train_neg_count:]
    
    # Create dictionaries to hold the training and testing data
    training_data = {patient: sample_dataset[patient] for patient in training_patients}
    testing_data = {patient: sample_dataset[patient] for patient in testing_patients}
    
    
    # Create a generator for the training dataset
    def generate_training_data():
        for patient_id in training_patients:
            patient_data = training_data[patient_id]
            for data in patient_data:
                yield data['patient_id'], data['img_exam'], data['mask_exam'], data['label']
    
    # Create a TensorFlow Dataset from the generator
    training_dataset = tf.data.Dataset.from_generator(
        generate_training_data,
        output_signature=(
            tf.TensorSpec(shape=(), dtype=tf.string),  # For patient_id
            tf.TensorSpec(shape=(None, None, None), dtype=tf.uint16),  # For img_exam
            tf.TensorSpec(shape=(None, None, None), dtype=tf.uint16),  # For mask_exam
            tf.TensorSpec(shape=(), dtype=tf.int64)  # For label
        )
    )
    
    # Create a generator for the training dataset
    def generate_testing_data():
        for patient_id in testing_patients:
            patient_data = testing_data[patient_id]
            print(patient_data.keys())
            for data in patient_data:
                yield data['patient_id'], data['img_exam'], data['mask_exam'], data['label']
    
    # Create a TensorFlow Dataset from the generator
    testing_dataset = tf.data.Dataset.from_generator(
        generate_testing_data,
        output_signature=(
            tf.TensorSpec(shape=(), dtype=tf.string),  # For patient_id
            tf.TensorSpec(shape=(None, None, None), dtype=tf.uint16),  # For img_exam
            tf.TensorSpec(shape=(None, None, None), dtype=tf.uint16),  # For mask_exam
            tf.TensorSpec(shape=(), dtype=tf.int64)  # For label
        )
    )

    return training_dataset, testing_dataset

In [61]:
training_ds, testing_ds = get_training_testing_sm_datasets()

training_stop = 0
testing_stop = 0
# Iterate through the training dataset
for patient_id, img_exam, mask_exam, label in training_ds:
    training_stop +=1
    print("Patient ID:", patient_id.numpy().decode('utf-8'))
    print("Image Exam Shape:", img_exam.shape)
    print("Mask Exam Shape:", mask_exam.shape)
    print("Label:", label.numpy())

    if training_stop == 10:
        break

# Iterate through the testing dataset
for patient_id, img_exam, mask_exam, label in testing_ds:
    print("Patient ID:", patient_id.numpy().decode('utf-8'))
    print("Image Exam Shape:", img_exam.shape)
    print("Mask Exam Shape:", mask_exam.shape)
    print("Label:", label.numpy())
    testing_stop += 1
    if testing_stop == 10: break

2023-10-17 12:55:30.449864: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1
Patient ID: sm_010
Image Exam Shape: (512, 512, 1)
Mask Exam Shape: (512, 512, 1)
Label: 1


2023-10-17 12:55:30.761068: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-10-17 12:55:30.833064: W tensorflow/core/framework/op_kernel.cc:1818] UNKNOWN: AttributeError: '_PrefetchDataset' object has no attribute 'keys'
Traceback (most recent call last):

  File "/home/kali/miniconda3/envs/lung_radiomics_2/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 267, in __call__
    ret = func(*args)

  File "/home/kali/miniconda3/envs/lung_radiomics_2/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/home/kali/miniconda3/envs/lung_radiomics_2/lib/python3.8/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in gener

UnknownError: {{function_node __wrapped__IteratorGetNext_output_types_4_device_/job:localhost/replica:0/task:0/device:CPU:0}} AttributeError: '_PrefetchDataset' object has no attribute 'keys'
Traceback (most recent call last):

  File "/home/kali/miniconda3/envs/lung_radiomics_2/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 267, in __call__
    ret = func(*args)

  File "/home/kali/miniconda3/envs/lung_radiomics_2/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/home/kali/miniconda3/envs/lung_radiomics_2/lib/python3.8/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/tmp/ipykernel_448783/739863865.py", line 55, in generate_testing_data
    print(patient_data.keys())

AttributeError: '_PrefetchDataset' object has no attribute 'keys'


	 [[{{node PyFunc}}]] [Op:IteratorGetNext]