In [1]:
import tensorflow as tf
import keras
import numpy as np
import os
import json
import cv2
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
from keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam

2024-04-05 14:15:51.206631: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
cancer_folder = '/raid/mpsych/OMAMA/DATA/data/2d_512_cancer'
noncancer_folder = '/raid/mpsych/OMAMA/DATA/data/2d_512_Noncancer'

In [3]:
cancer_files = [os.path.join(cancer_folder, f) for f in os.listdir(cancer_folder) if f.endswith('.npz')]
noncancer_files = [os.path.join(noncancer_folder, f) for f in os.listdir(noncancer_folder) if f.endswith('.npz')]

In [4]:
len(cancer_files)

7351

In [5]:
len(noncancer_files)

154238

In [6]:
noncancer_files = noncancer_files[:7351]

In [7]:
len(noncancer_files)

7351

In [8]:
# Split the dataset (adjust the test_size as needed)
train_files, test_files = train_test_split(list(zip(cancer_files, noncancer_files)), test_size=0.3, random_state=42)
val_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)

In [9]:
len(train_files)

5145

In [10]:
len(test_files)

1103

In [11]:
len(val_files)

1103

In [12]:
def load_npz_files(folder):
    features = []
    labels = []
    files = os.listdir(folder)
    for file in files:
        if file.endswith('.npz'):
            npz_path = os.path.join(folder, file)
            data = np.load(npz_path)
            # Extract features (replace this with your actual feature extraction method)
            feature_vector = data['data']
            features.append(feature_vector)
            # Label based on folder
            if folder == cancer_folder:
                labels.append(1)  # Cancer
            else:
                labels.append(0)  # Non-cancer
    return features, labels

In [13]:
# cancer, cancer_labels = load_npz_files(cancer_folder)
# noncancer, noncancer_labels = load_npz_files(noncancer_folder)

In [None]:
# Load NPZ files
cancer_features, cancer_labels = load_npz_files(cancer_folder)
noncancer_features, noncancer_labels = load_npz_files(noncancer_folder)

# Combine cancer and non-cancer data
X = np.concatenate([cancer_features, noncancer_features], axis=0)
y = np.concatenate([cancer_labels, noncancer_labels], axis=0)

# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# Define the model
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Define callbacks
early_stopping = callbacks.EarlyStopping(patience=5, monitor='val_loss', restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)

print("Test Accuracy:", test_acc)

# Plot training history
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')
plt.show()