In [11]:
import os
import numpy as np
import keras
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from keras.callbacks import EarlyStopping

In [2]:
cancer_folder = '/raid/mpsych/OMAMA/DATA/data/2d_512_cancer'
noncancer_folder = '/raid/mpsych/OMAMA/DATA/data/2d_512_Noncancer'

cancer_files = [os.path.join(cancer_folder, f) for f in os.listdir(cancer_folder) if f.endswith('.npz')]
noncancer_files = [os.path.join(noncancer_folder, f) for f in os.listdir(noncancer_folder) if f.endswith('.npz')]
noncancer_files = noncancer_files[:7351]

In [3]:
len(cancer_files)

7351

In [4]:
len(noncancer_files)

7351

In [5]:
def load_data(file_paths):
    data = []
    for file_path in file_paths:
        with np.load(file_path) as data_file:
            data.append(data_file['data'])
    return np.array(data)


In [6]:
cancer_data = load_data(cancer_files)
noncancer_data = load_data(noncancer_files)

In [7]:
# Assign labels (1 for cancer, 0 for non-cancer)
cancer_labels = np.ones(len(cancer_data))
noncancer_labels = np.zeros(len(noncancer_data))

# Concatenate data and labels
X = np.concatenate((cancer_data, noncancer_data))
y = np.concatenate((cancer_labels, noncancer_labels))

X = np.expand_dims(X, axis=-1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [13]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(512, 512, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [14]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [16]:
# Train the model
model.fit(X_train, y_train, epochs=15, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7efba07e9250>

In [17]:
# Make predictions
predictions = model.predict(X_test)

# Convert probabilities to binary predictions
binary_predictions = np.round(predictions).flatten()

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_acc)

# Print some sample predictions
for i in range(30):
    print("True Label:", y_test[i], "Predicted Label:", binary_predictions[i])


Test Accuracy: 0.5623937249183655
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 1.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 1.0
True Label: 1.0 Predicted Label: 1.0
True Label: 0.0 Predicted Label: 0.0
True Label: 1.0 Predicted Label: 1.0
True Label: 1.0 Predicted Label: 1.0
True Label: 1.0 Predicted Label: 0.0
True Label: 1.0 Predicted Label: 1.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 1.0 Predicted Label: 1.0
True Label: 0.0 Predicted Label: 0.0
True Label: 1.0 Predicted Label: 1.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 0.0
True Label: 0.0 Predicted Label: 1.0
True Label: 0.0 Predicted Label: 0.0
True Label: 1.0 Predicted Label: 1.0
True