In [1]:
import cv2
import pydicom
import pandas as pd 
import numpy as np
from pathlib import Path
import tensorflow as tf
from pathlib import Path
from skimage import exposure
from sklearn.model_selection import train_test_split

from keras.applications import ResNet50
from keras.layers import Input, Conv2D, GlobalAveragePooling2D, Dropout, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator # use this to generate more data

In [2]:
path = Path("data/content/")

In [3]:
df = pd.read_csv(path / "labels.csv")
df.head(100)

Unnamed: 0,SOPInstanceUID,Label
0,1.2.246.512.1002.1058214001.1267878993.1650415...,normal
1,2.25.4669378111734520225376815262123197003,normal
2,1.2.246.512.1.2.0.4.397070732735579.1755416012...,normal
3,2.25.253986991488355213316410237483051286906,normal
4,2.25.61269522713157549205486841880699641492,normal
...,...,...
95,2.25.95301085161359068904812759023575525475,abnormal
96,1.2.246.512.1.2.0.4.120699027219781.1785915121...,normal
97,2.25.40752906400399014196156034336759107300,normal
98,1.2.246.512.1002.3876311061.1152214383.7426756...,normal


In [4]:
len(df.loc[df['Label'] == "abnormal"])

74

In [6]:
def image_generator(df, batch_size):
    num_samples = len(df)
    steps_per_epoch = num_samples // batch_size
    while True:
        for i in range(steps_per_epoch):
            batch_df = df[i*batch_size:(i+1)*batch_size]
            batch_images = []
            batch_labels = []
            for idx, (sop, lbl) in batch_df.iterrows():
                ds = pydicom.dcmread(path / f"DICOM/{sop}.dcm")
                img = ds.pixel_array.astype(float)
                cropped = img[200:1300, 500:2700]
                exposures_image = exposure.adjust_gamma(cropped, gamma=0.8)
                cropped_norm = exposure.rescale_intensity(exposures_image, in_range='image', out_range=(0, 255))
                resized_img = cv2.resize(cropped_norm, (1100, 550)) / 255.
                label = 1 if lbl == "abnormal" else 0
                batch_images.append(resized_img)
                batch_labels.append(label)

            yield np.array(batch_images), np.array(batch_labels), batch_size

In [None]:
# Split the dataset into train and validation sets
train_df, test_df = train_test_split(df, test_size=0.15, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=42)

# Create data generators for train and validation
batch_size = 8
train_data_generator = image_generator(train_df, batch_size)
val_data_generator = image_generator(val_df, batch_size)

In [14]:
# Preprocessing
inputs = Input(shape=(550, 1100, 1))
x = Conv2D(3, 1, 1, padding='same')(inputs)
x = tf.keras.layers.experimental.preprocessing.Rescaling(scale=1./255)(x)  # Normalize pixel values between 0 and 1

# Base model
base_model = ResNet50(input_shape=(550, 1100, 3),
                      include_top=False,
                      weights='imagenet')
base_model.trainable = False
x = base_model(x)

# Global average pooling and regularization
x = GlobalAveragePooling2D()(x)
x = Dropout(0.2)(x)

# Additional dense layer
x = Dense(256, activation='relu')(x)

# Output layer
outputs = Dense(1)(x)

# Create model
model = Model(inputs, outputs)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0005),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['mae', 'acc'])

# Print model summary
model.summary()

# Callbacks
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=8,mode='min', verbose=1)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 550, 1100, 1)]    0         
                                                                 
 conv2d (Conv2D)             (None, 550, 1100, 3)      6         
                                                                 
 rescaling (Rescaling)       (None, 550, 1100, 3)      0         
                                                                 
 resnet50 (Functional)       (None, 18, 35, 2048)      23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                             

  super().__init__(name, **kwargs)


In [108]:
# Training
steps_per_epoch = len(train_df) // batch_size
validation_steps = len(val_df) // batch_size
history = model.fit(train_data_generator, 
                    steps_per_epoch=steps_per_epoch,
                    validation_data=val_data_generator,
                    validation_steps=validation_steps,
                    epochs=20,
                    callbacks=[checkpoint, early_stopping])

In [16]:
loaded_model = tf.keras.models.load_model("best_model.h5")
loaded_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 550, 1100, 1)]    0         
                                                                 
 conv2d (Conv2D)             (None, 550, 1100, 3)      6         
                                                                 
 rescaling (Rescaling)       (None, 550, 1100, 3)      0         
                                                                 
 resnet50 (Functional)       (None, 18, 35, 2048)      23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                             

In [17]:
test_generator = image_generator(test_df, batch_size)
evaluation = loaded_model.evaluate(test_generator, steps=len(test_df) // batch_size)
evaluation



[0.2858302891254425, 2.6707539558410645, 0.91756272315979]