In [1]:
import numpy as np
import tensorflow as tf
from skimage.transform import resize
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.src import layers
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from keras.callbacks import EarlyStopping
import os

In [4]:
#Preparation of the data, Y component

#Because the data is stored in a single .txt file with consistent formating, we can just parse it

text_path_base = 'C:/ML Data/MIAS/MIASduplicatesRemoved.txt' #Hardcoded path. Modify if needed
image_path_base = 'C:/ML Data/MIAS/all-mias'

tempDataArray = []
tempImageArray = []

with open(text_path_base) as f:
  f.readline() #skip first line
  for line in f:
    filename = line[0:6] + ".pgm"

    imagefilepath = os.path.join(image_path_base, filename)

    if os.path.isfile(imagefilepath) and filename.endswith('.pgm'):
      try:
        if line[14] == 'B':
          tempDataArray.append(np.array(0))
          tempImageArray.append(imagefilepath)

        elif line[14] == 'M':
          tempDataArray.append(np.array(1))
          tempImageArray.append(imagefilepath)
      except:
        print(line)



tempImageArray = np.array(tempImageArray)
tempDataArray = np.array(tempDataArray)


In [5]:
class DataGenerator:
    def __init__(self, image_paths, labels, batch_size=32, augment=False):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.augment = augment
        self.datagen = ImageDataGenerator(
            rotation_range=20,
            zoom_range=0.1,
            horizontal_flip=True
        ) if augment else None

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, idx):
        batch_paths = self.image_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_images = []
        for path in batch_paths:

            img = load_img(path, color_mode='rgb')
            img_array = img_to_array(img)
            img_tensor = tf.convert_to_tensor(img_array)
            img = tf.image.resize_with_pad(img_tensor, 600, 600)
            batch_images.append(img)

        X = np.array(batch_images)
        y = np.array(batch_labels).reshape(-1, 1)

        if self.augment:
            for i in range(len(X)):
                if np.random.random() > 0.5:
                    X[i] = self.datagen.random_transform(X[i])

        return X, y

    def generate(self):
        while True:
            indices = np.random.permutation(len(self.image_paths))
            self.image_paths = self.image_paths[indices]
            self.labels = self.labels[indices]

            for i in range(len(self)):
                yield self.__getitem__(i)

test_gen = DataGenerator(tempImageArray, tempDataArray, batch_size=32, augment=False)

In [6]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix
from tensorflow.keras.models import load_model

# Load best saved weights
model = tf.keras.models.load_model(r'C:\Users\gabel\Desktop\Models to Evaluate\dropoutandl2andoversampling.keras')
#print("Best weights loaded for evaluation.")

def evaluate_model(model, generator, steps, model_name="Model"):
    if hasattr(generator, 'on_epoch_end'):
        generator.on_epoch_end()

    y_true = []
    y_pred_prob = []

    for i in range(steps):
        if hasattr(generator, '__getitem__'):
            batch_x, batch_y = generator.__getitem__(i)
        else:
            batch_x, batch_y = next(generator.generate())

        batch_pred = model.predict(batch_x, verbose=0)

        y_true.extend(batch_y)
        y_pred_prob.extend(batch_pred)

    y_true = np.array(y_true).flatten()
    y_pred_prob = np.array(y_pred_prob).flatten()

    y_pred = (y_pred_prob > 0.5).astype(int)

    # --- Classification Report ---
    print(f"\n--- {model_name} Classification Report ---")
    print(classification_report(y_true, y_pred, target_names=['Benign', 'Malignant']))

    # --- Confusion Matrix ---
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(f'{model_name} Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(2)
    plt.xticks(tick_marks, ['Benign', 'Malignant'], rotation=45)
    plt.yticks(tick_marks, ['Benign', 'Malignant'])

    thresh = cm.max() / 2.
    for i in range(2):
        for j in range(2):
            plt.text(j, i, format(cm[i, j], 'd'),
                     ha="center", va="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.tight_layout()

    cm_path = fr'C:\Users\gabel\Desktop\Models to Evaluate\Graph Images\DropoutANDL2AndOverSamplingCM.png'
    plt.savefig(cm_path)
    print(f"Confusion matrix saved to: {cm_path}")
    plt.close()

    # --- ROC Curve ---
    fpr, tpr, _ = roc_curve(y_true, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(6, 5))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'{model_name} ROC Curve')
    plt.legend(loc="lower right")

    roc_path = fr'C:\Users\gabel\Desktop\Models to Evaluate\Graph Images\DropOutANDL2AndOverSamplingROC.png'
    plt.savefig(roc_path)
    print(f"ROC curve saved to: {roc_path}")
    plt.close()

    return {
        'accuracy': (y_pred == y_true).mean(),
        'auc': roc_auc,
        'y_true': y_true,
        'y_pred': y_pred,
        'y_pred_prob': y_pred_prob,
        'confusion_matrix': cm
    }

results = evaluate_model(model, test_gen, len(test_gen), model_name="Custom RNN Dropout and L2 And Oversampling")

print(f"\n Final Accuracy: {results['accuracy']:.4f}, AUC: {results['auc']:.4f}")

print("\nConfusion Matrix:")
print(results['confusion_matrix'])


--- Custom RNN Dropout and L2 And Oversampling Classification Report ---
              precision    recall  f1-score   support

      Benign       0.47      0.15      0.23        59
   Malignant       0.44      0.80      0.57        49

    accuracy                           0.44       108
   macro avg       0.46      0.47      0.40       108
weighted avg       0.46      0.44      0.38       108

Confusion matrix saved to: C:\Users\gabel\Desktop\Models to Evaluate\Graph Images\DropoutANDL2AndOverSamplingCM.png
ROC curve saved to: C:\Users\gabel\Desktop\Models to Evaluate\Graph Images\DropOutANDL2AndOverSamplingROC.png

 Final Accuracy: 0.4444, AUC: 0.4905

Confusion Matrix:
[[ 9 50]
 [10 39]]


In [None]:
model.summary()