In [1]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split, KFold
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
import pydicom
from PIL import Image

In [8]:
def load_dicom_image(path, img_size=256):
    # Load dicom and convert to 256x256 RGB image
    dicom = pydicom.dcmread(path)
    image = dicom.pixel_array
    image = Image.fromarray(image).convert('RGB')
    image = image.resize((img_size, img_size))
    image = np.array(image) / 255.0

    # Convert to TensorFlow tensor
    return tf.convert_to_tensor(image, dtype=tf.float32)

In [3]:
def process_directory(directory_path, label, img_size=256):
    images, labels = [], []
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            # Check if the file is a DICOM file
            if filename.endswith('.dcm'):
                # Construct the full path to the DICOM file
                file_path = os.path.join(root, filename)
                # Load and process the image
                image = load_dicom_image(file_path, img_size=img_size)
                # Append the processed image and its label to the lists
                images.append(image)
                labels.append(label)
    return images, labels

In [4]:
def create_datasets(cancer_dir, non_cancer_dir, img_size=256):
    # Load and process images and labels
    cancer_images, cancer_labels = process_directory(cancer_dir, 1, img_size=img_size)
    non_cancer_images, non_cancer_labels = process_directory(non_cancer_dir, 0, img_size=img_size)
    
    # Combine images and labels
    all_images = np.concatenate([np.array(cancer_images), np.array(non_cancer_images)], axis=0)
    all_labels = np.array(cancer_labels + non_cancer_labels)
    
    return all_images, all_labels

In [5]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2

def build_model_with_resnet(input_shape=(256, 256, 3), dropout_rate=0.5, l2_reg=0.001):
    base_model = ResNet50(input_shape=input_shape,
                          include_top=False,
                          weights='imagenet')
    base_model.trainable = False  

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid', kernel_regularizer=l2(l2_reg))
    ])
    
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [9]:
# Load all data
all_images, all_labels = create_datasets(
    cancer_dir='/Users/arjunmoorthy/Desktop/Research_Capstone/ImageData/CapstoneData/cancer',
    non_cancer_dir='/Users/arjunmoorthy/Desktop/Research_Capstone/ImageData/CapstoneData/non_cancer',
    img_size=256)

# Split data into training+validation and test sets
(train_val_images, test_images, train_val_labels, test_labels) = train_test_split(
    all_images, all_labels, test_size=0.2, random_state=42)

# Prepare for 5-Fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [10]:
# Cross-validation loop
for fold, (train_idx, val_idx) in enumerate(kf.split(train_val_images)):
    print(f"Training on fold {fold+1}/5...")
    # Generate datasets for the current fold
    train_dataset = tf.data.Dataset.from_tensor_slices((train_val_images[train_idx], train_val_labels[train_idx])).batch(32).shuffle(len(train_idx))
    val_dataset = tf.data.Dataset.from_tensor_slices((train_val_images[val_idx], train_val_labels[val_idx])).batch(32)
    
    # Build and compile model
    model = build_model_with_resnet()
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', Precision(name='precision'), Recall(name='recall'), AUC(name='auc')])
    
    # Fit model
    history = model.fit(train_dataset, epochs=50, validation_data=val_dataset)

Training on fold 1/5...


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [None]:
# After cross-validation, you might want to retrain your model on all training data and evaluate it on the test set
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(32)
# Make sure to re-build and compile your model