In [None]:
import os
import numpy as np
import pydicom
import tensorflow as tf
from skimage.transform import resize
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras import layers, optimizers, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers.schedules import CosineDecay
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
#from keras.src import layers
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.optimizers.schedules import CosineDecay

from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score

In [6]:

# Paths
image_path_base = r'C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\png'
text_path_base = r'C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\myver_cleanCropCMMD.csv'
model_weights_path = r'C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\weights\best_model_oversampled.h5'

def get_image_paths_and_labels():
    image_paths = []
    labels = []
    b_count = 0
    m_count = 0

    with open(text_path_base) as f:
        f.readline()

        for line in f:
            splitLine = line.split(",")

            imagePath = splitLine[2].replace("\n", "").replace('"',"") 
            image_paths.append(imagePath)

            classification = splitLine[1].replace('"', '') 
            if classification == "0":
              labels.append(np.array([0]))
              b_count += 1
            if classification == "1":
              labels.append(np.array([1]))
              m_count += 1

    print(f"Benign count: {b_count}")
    print(f"Malignant count: {m_count}")
    return np.array(image_paths), np.array(labels)

image_paths, labels = get_image_paths_and_labels()

paths_train, paths_test, y_train, y_test = train_test_split(
    image_paths, labels, test_size=0.2, random_state=1, stratify=labels
)

print(f"Test set: {len(paths_test)} images (Benign: {np.sum(y_test == 0)}, Malignant: {np.sum(y_test == 1)})")


Benign count: 1416
Malignant count: 4174
Test set: 1118 images (Benign: 283, Malignant: 835)


In [7]:
class TestDataGenerator:
    def __init__(self, image_paths, labels, batch_size=32, augment=False):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, idx):
        batch_paths = self.image_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_images = []
        for path in batch_paths:

            img = load_img(path, color_mode='rgb')
            img_array = img_to_array(img)
            img_tensor = tf.convert_to_tensor(img_array)
            img = tf.image.resize_with_pad(img_tensor, 600, 600)
            batch_images.append(img)

        X = np.array(batch_images)
        y = np.array(batch_labels).reshape(-1, 1)

        return X, y

    def generate(self):
        for i in range(len(self)):
            yield self.__getitem__(i)

test_gen = TestDataGenerator(paths_test, y_test, batch_size=32)

In [9]:
# Load pretrained ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(600, 600, 3))
base_model.trainable = False

x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
x = layers.Dropout(0.5)(x)
output = layers.Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(optimizer=optimizers.Adam(learning_rate=1e-4),
              loss="binary_crossentropy",
              metrics=["accuracy", tf.keras.metrics.AUC(name='auc')])

In [10]:
model.load_weights(model_weights_path)

In [11]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix
from tensorflow.keras.models import load_model

# Load best saved weights
#model.load_weights(r'C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\weights\best_model.weights.h5')

def evaluate_model(model, generator, steps, model_name="Model"):
    if hasattr(generator, 'on_epoch_end'):
        generator.on_epoch_end()

    y_true = []
    y_pred_prob = []
    
    for i in range(steps):
        if hasattr(generator, '__getitem__'):
            batch_x, batch_y = generator.__getitem__(i)
        else:
            batch_x, batch_y = next(generator.generate())
            
        batch_pred = model.predict(batch_x, verbose=0)
        
        y_true.extend(batch_y)
        y_pred_prob.extend(batch_pred)

    y_true = np.array(y_true).flatten()
    y_pred_prob = np.array(y_pred_prob).flatten()
    
    y_pred = (y_pred_prob > 0.5).astype(int)

    # --- Classification Report ---
    print(f"\n--- {model_name} Classification Report ---")
    print(classification_report(y_true, y_pred, target_names=['Benign', 'Malignant']))

    # --- Confusion Matrix ---
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(f'{model_name} Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(2)
    plt.xticks(tick_marks, ['Benign', 'Malignant'], rotation=45)
    plt.yticks(tick_marks, ['Benign', 'Malignant'])

    thresh = cm.max() / 2.
    for i in range(2):
        for j in range(2):
            plt.text(j, i, format(cm[i, j], 'd'),
                     ha="center", va="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.tight_layout()
    
    cm_path = fr'C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\result3_oversampled\oversampled_finished_confusion_matrix.png'
    plt.savefig(cm_path)
    print(f"Confusion matrix saved to: {cm_path}")
    plt.close()

    # --- ROC Curve ---
    fpr, tpr, _ = roc_curve(y_true, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(6, 5))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'{model_name} ROC Curve')
    plt.legend(loc="lower right")
    
    roc_path = fr'C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\result3_oversampled\oversampled_finished_roc_curve.png'
    plt.savefig(roc_path)
    print(f"ROC curve saved to: {roc_path}")
    plt.close()

    return {
        'accuracy': (y_pred == y_true).mean(),
        'auc': roc_auc,
        'y_true': y_true,
        'y_pred': y_pred,
        'y_pred_prob': y_pred_prob,
        'confusion_matrix': cm
    }

results = evaluate_model(model, test_gen, len(test_gen), model_name="Oversampled_ResNet50")

print(f"\n Final Accuracy: {results['accuracy']:.4f}, AUC: {results['auc']:.4f}")

print("\nConfusion Matrix:")
print(results['confusion_matrix'])


--- Oversampled_ResNet50 Classification Report ---
              precision    recall  f1-score   support

      Benign       0.44      0.28      0.34       283
   Malignant       0.78      0.88      0.83       835

    accuracy                           0.73      1118
   macro avg       0.61      0.58      0.59      1118
weighted avg       0.70      0.73      0.71      1118

Confusion matrix saved to: C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\result3_oversampled\oversampled_finished_confusion_matrix.png
ROC curve saved to: C:\Users\yaruu\OneDrive\Documents\DIS Copenhagen 2025\Courses\ANN & DL\Final Project\Datasets\CMMD_Clean\result3_oversampled\oversampled_finished_roc_curve.png

 Final Accuracy: 0.7281, AUC: 0.6854

Confusion Matrix:
[[ 79 204]
 [100 735]]
