In [1]:
import pydicom
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import os
from tensorflow.keras.metrics import Precision, Recall, AUC
from sklearn.model_selection import train_test_split

In [2]:
def load_dicom_image(path, img_size=256):
    # Load dicom and convert to 256x256 RGB image
    dicom = pydicom.dcmread(path)
    image = dicom.pixel_array
    image = Image.fromarray(image).convert('RGB')
    image = image.resize((img_size, img_size))
    return np.array(image) / 255.0

    # Convert to TensorFlow tensor
    return tf.convert_to_tensor(image, dtype=tf.float32)

In [3]:
def process_directory(directory_path, label, img_size=256):
    images, labels = [], []
    for filename in os.listdir(directory_path):
        if filename.endswith('.dcm'):
            image_path = os.path.join(directory_path, filename)
            image = load_dicom_image(image_path, img_size=img_size)
            images.append(image)
            labels.append(label)
    return images, labels

In [10]:
# Adjusted handling for tensors when creating datasets
def create_datasets(cancer_dir, non_cancer_dir, img_size=256, batch_size=32, val_split=0.2):
    cancer_images, cancer_labels = process_directory(cancer_dir, 1, img_size=img_size)
    non_cancer_images, non_cancer_labels = process_directory(non_cancer_dir, 0, img_size=img_size)
    
    # Combine images and labels
    all_images = cancer_images + non_cancer_images
    all_labels = cancer_labels + non_cancer_labels
    
    # Convert labels to tensors
    all_labels = tf.convert_to_tensor(all_labels, dtype=tf.float32)
    
    # Create a dataset directly from tensors
    dataset = tf.data.Dataset.from_tensor_slices((all_images, all_labels))
    dataset = dataset.shuffle(buffer_size=len(all_images))
    
    # Split into training and validation datasets
    val_size = int(len(all_images) * val_split)
    val_dataset = dataset.take(val_size)
    train_dataset = dataset.skip(val_size)
    
    # Batch the datasets
    train_dataset = train_dataset.batch(batch_size)
    val_dataset = val_dataset.batch(batch_size)
    
    return train_dataset, val_dataset

In [5]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2

def build_model_with_resnet(input_shape=(256, 256, 3), dropout_rate=0.5, l2_reg=0.001):
    base_model = ResNet50(input_shape=input_shape,
                          include_top=False,
                          weights='imagenet')
    base_model.trainable = False  

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid', kernel_regularizer=l2(l2_reg))
    ])
    
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [13]:
# train_dataset = create_dataset('/Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/CapstoneData', img_size=256, batch_size=32)

train_dataset, val_dataset = create_datasets(
    cancer_dir='/Users/arjunmoorthy/Desktop/Research_Capstone/ImageData/CapstoneData/cancer',
    non_cancer_dir='/Users/arjunmoorthy/Desktop/Research_Capstone/ImageData/CapstoneData/non_cancer',
    img_size=256,  
    batch_size=32,  
    val_split=0.2   
)


2024-03-30 16:52:16.253146: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: buffer_size must be greater than zero or UNKNOWN_CARDINALITY


InvalidArgumentError: {{function_node __wrapped__ShuffleDatasetV3_device_/job:localhost/replica:0/task:0/device:CPU:0}} buffer_size must be greater than zero or UNKNOWN_CARDINALITY [Op:ShuffleDatasetV3] name: 

In [None]:
# Test the load_dicom_image function with a single DICOM file
test_image_path = '/Users/arjunmoorthy/Desktop/13.000000-t2spcrstaxial oblProstate-06314/1-48.dcm'
test_image = load_dicom_image(test_image_path, img_size=256)
print(test_image.shape)  # Should print: (256, 256, 3)


In [None]:
model = build_model_with_resnet()

model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Or another appropriate loss function
              metrics=['accuracy', Precision(name='precision'), Recall(name='recall'), AUC(name='auc')])

In [None]:
history = model.fit(train_dataset,
                    epochs=50,
                    validation_data=val_dataset) 