In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import os
import numpy as np
from PIL import Image
import pydicom

2023-11-06 12:07:14.181951: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-06 12:07:14.277229: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-06 12:07:14.547901: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-06 12:07:14.547973: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-06 12:07:14.550822: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

In [4]:
# Load the CSV file
dataframe = pd.read_csv('train.csv')#.iloc[:330]
image_names = dataframe['image_name'].values
targets = dataframe['target'].values

In [5]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(image_names, targets, test_size=0.2, random_state=42)

In [6]:
# Create a data generator for augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [7]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [8]:
# Function to load and preprocess images
def load_dcm_image(file_path):
    dcm_image = pydicom.dcmread(file_path+".dcm")
    image_array = dcm_image.pixel_array
    image_array = Image.fromarray(image_array)
    image_array = image_array.resize((224, 224))  # Resize to the input size of the neural network
    return np.array(image_array)

In [9]:
# dir containing images
image_directory = 'train/'

# Convert image names to full paths
X_train_paths = [os.path.join(image_directory, fname) for fname in X_train]
X_test_paths = [os.path.join(image_directory, fname) for fname in X_test]

# Create image arrays
#X_train_images = np.array([load_dcm_image(path) for path in X_train_paths])
#X_test_images = np.array([load_dcm_image(path) for path in X_test_paths])

# Save for future use
#np.save('X_train_images.npy', X_train_images)
#np.save('X_test_images.npy', X_test_images)

# load arrays
X_train_images = np.load('X_train_images.npy')
X_test_images = np.load('X_test_images.npy')

In [10]:
# Build the model
base_model = MobileNetV2(weights='imagenet', include_top=False)  # Load MobileNetV2 without the top layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # New FC layer, output layer
embeddings = Dense(128, activation='relu')(x)  # This will be our embeddings
predictions = Dense(1, activation='sigmoid')(embeddings)  # Final prediction layer

model = Model(inputs=base_model.input, outputs=predictions)



2023-11-06 12:07:25.267833: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-06 12:07:25.270111: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [11]:
# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

In [12]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Setup checkpoint to save the best model
checkpoint = ModelCheckpoint('best_model.h5', verbose=1, save_best_only=True)
model.save('pretrain_model.h5')

  saving_api.save_model(


In [14]:
X_test_images.shape

(6626, 224, 224, 3)

In [None]:
# Configure the generators to use the image data and the labels
train_generator = train_datagen.flow(
    X_train_images, y_train, batch_size=32
)

validation_generator = test_datagen.flow(
    X_test_images, y_test, batch_size=32
)

# Train the model
model.fit(
    train_generator,
    validation_data=validation_generator,
    steps_per_epoch=len(X_train_images) // 32,
    validation_steps=len(X_test_images) // 32,
    epochs=10,
    callbacks=[checkpoint]
)

model.save('posttrain_model.h5')

In [None]:
# Load the best model
best_model = load_model('best_model.h5')

In [None]:
best_model.summary() # get layer name of 2nd to last layer (layer before predicitons)

In [None]:
# Fill in name of layer we want to extract embeddings from
embedding_layer_name = 'dense_4' 

embedding_model = Model(inputs=best_model.input, outputs=best_model.get_layer(embedding_layer_name).output)

# Save the embedding model
embedding_model.save('embedding_model.h5')


In [None]:
# Extract and save embeddings
train_embeddings = embedding_model.predict(train_datagen.flow(X_train_images))
test_embeddings = embedding_model.predict(test_datagen.flow(X_test_images))

# Save the embeddings
np.save('train_embeddings.npy', train_embeddings)
np.save('test_embeddings.npy', test_embeddings)
