In [7]:
import pydicom
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import os


In [2]:
def load_dicom_image(path, img_size=256):
    # Load dicom and convert to 256x256 RGB image
    dicom = pydicom.dcmread(path)
    image = dicom.pixel_array
    image = Image.fromarray(image).convert('RGB')
    image = image.resize((img_size, img_size))
    return np.array(image) / 255.0

    # Convert to TensorFlow tensor
    return tf.convert_to_tensor(image, dtype=tf.float32)


In [3]:
def process_patient_images(patient_folder, img_size=256):
    """
    Process all DICOM images of a patient and return a single representative array.
    This is a placeholder function; you need to decide how to handle multiple images.
    """
    images = []
    for filename in os.listdir(patient_folder):
        if filename.endswith('.dcm'):
            path = os.path.join(patient_folder, filename)
            image = load_dicom_image(path, img_size=img_size)
            images.append(image)
    
    # Placeholder: simply return the first image for now
    if images:
        return images[0]
    else:
        return None

In [4]:
def create_dataset(root_dir, img_size=256, batch_size=32):
    # Placeholder lists for images and labels
    all_images = []
    all_labels = []
    
    # Navigate through the root directory and process each patient's images
    for condition in os.listdir(root_dir):
        condition_path = os.path.join(root_dir, condition)
        if not os.path.isdir(condition_path):
            continue
        
        label = 1 if condition.lower() == 'cancer' else 0
        
        for patient_folder_name in os.listdir(condition_path):
            patient_folder_path = os.path.join(condition_path, patient_folder_name)
            if not os.path.isdir(patient_folder_path):
                continue
            
            representative_image = process_patient_images(patient_folder_path, img_size=img_size)
            if representative_image is not None:
                all_images.append(representative_image)
                all_labels.append(label)
    
    # Convert lists to tensors
    dataset_images = tf.stack(all_images)
    dataset_labels = tf.convert_to_tensor(all_labels, dtype=tf.float32)
    
    # Create a tf.data.Dataset
    dataset = tf.data.Dataset.from_tensor_slices((dataset_images, dataset_labels))
    dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)
    
    return dataset

In [5]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential

def build_model(input_shape=(256, 256, 3)):
    base_model = MobileNetV2(input_shape=input_shape,
                             include_top=False,
                             weights='imagenet')
    base_model.trainable = False
    
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [9]:
train_dataset = create_dataset('/Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/CapstoneData', img_size=256, batch_size=32)

In [10]:
model = build_model()

history = model.fit(train_dataset,
                    epochs=50)  # Adjust the number of epochs as necessary

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


  base_model = MobileNetV2(input_shape=input_shape,


[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 135ms/step - accuracy: 0.5310 - loss: 0.7394
Epoch 2/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 133ms/step - accuracy: 0.5315 - loss: 0.7371
Epoch 3/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 135ms/step - accuracy: 0.5763 - loss: 0.6789
Epoch 4/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 135ms/step - accuracy: 0.5745 - loss: 0.6757
Epoch 5/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 135ms/step - accuracy: 0.5836 - loss: 0.6698
Epoch 6/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 134ms/step - accuracy: 0.5844 - loss: 0.6920
Epoch 7/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 136ms/step - accuracy: 0.6053 - loss: 0.6729
Epoch 8/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13

In [3]:
dicom_dir_path = '/Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/CapstoneData'  # Directory where DICOM files are stored

tensor([1.], device='mps:0')
