In [138]:
import os
import cv2
import numpy as np 
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.regularizers import l2
import tensorflow_datasets as tfds
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

In [139]:
baseDir = "./datasetBinaryTumor/"
os.listdir(baseDir)

['.DS_Store', 'test', 'train']

In [140]:
import tensorflow as tf

# Load training and validation datasets
train_ds = tf.keras.utils.image_dataset_from_directory(
    baseDir + "train",
    labels="inferred",
    label_mode="categorical",  # Use categorical for multi-class classification
    image_size=(224, 224),
    batch_size=32,
    shuffle=True,
    validation_split=0.2,  # Reserve 20% of data for validation
    subset="training",     # Load training subset
    seed=42                # Ensure reproducibility
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    baseDir + "train",
    labels="inferred",
    label_mode="categorical",
    image_size=(224, 224),
    batch_size=32,
    shuffle=True,
    validation_split=0.2,
    subset="validation",
    seed=42
)

# Test dataset (assuming separate directory for test data)
test_ds = tf.keras.utils.image_dataset_from_directory(
    baseDir + "test",
    labels="inferred",
    label_mode="categorical",
    image_size=(224, 224),
    batch_size=32,
    shuffle=False  # No shuffling for test set
)

train_ds.class_names
for images, labels in train_ds.take(1):
    print(f"Images shape: {images.shape}")
    print(f"Labels shape: {labels.shape}")

Found 6400 files belonging to 2 classes.
Using 5120 files for training.
Found 6400 files belonging to 2 classes.
Using 1280 files for validation.
Found 1600 files belonging to 2 classes.
Images shape: (32, 224, 224, 3)
Labels shape: (32, 2)


### Dataset normalization

In [141]:
normalization_layer = tf.keras.layers.Rescaling(1./255)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomZoom(0.2)
])

train_ds = train_ds.map(lambda x, y: (normalization_layer(data_augmentation(x, training=True)), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))

In [142]:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

### Load MobileNet CNN architecture

In [143]:
vgg_16 = tf.keras.applications.VGG16(input_shape = (224,224,3), include_top=False)

### Create model

In [144]:
# Define model with explicit input
model = Sequential([
    tf.keras.Input(shape=(224, 224, 3)),  # 🔹 Explicit input layer
    vgg_16,
    Flatten(),
    Dense(256, activation='relu', kernel_regularizer=l2(0.01)),  # Hidden layer
    Dropout(0.3),
    Dense(2, activation='softmax')  # Final classification layer
])

# Call model to initialize input shape
model.build(input_shape=(None, 224, 224, 3))
# model.summary()

In [145]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='categorical_crossentropy',  # Suitable for binary classification
                  metrics=['accuracy'])

### Train model

In [146]:
history = model.fit(train_ds, epochs=20, validation_data=val_ds)

Epoch 1/2
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1171s[0m 7s/step - accuracy: 0.8654 - loss: 0.4880 - val_accuracy: 0.9930 - val_loss: 0.3882
Epoch 2/2
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m477s[0m 3s/step - accuracy: 0.9569 - loss: 0.3922 - val_accuracy: 0.9898 - val_loss: 0.3696


### Evaluate model

In [148]:
model.save("VGG16_tumornotumor.keras")

In [149]:
# Plot training and validation accuracy and loss
def plot_metrics(history, save_dir):
    os.makedirs(save_dir, exist_ok=True)

    # Accuracy plot
    plt.figure(figsize=(8, 6))
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(os.path.join(save_dir, 'accuracy_plot.png'))
    plt.close()

    # Loss plot
    plt.figure(figsize=(8, 6))
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(os.path.join(save_dir, 'loss_plot.png'))
    plt.close()

plot_metrics(history, 'output/graphs')

In [150]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, roc_curve, auc, precision_recall_fscore_support, matthews_corrcoef, cohen_kappa_score, balanced_accuracy_score, jaccard_score, log_loss, fbeta_score
)

from tensorflow.keras.models import load_model

# Ensure the results directory exists
results_dir = "binaryVGG16TUMORNOTUMOR"
os.makedirs(results_dir, exist_ok=True)

# Save training metrics (loss and accuracy)
def save_training_metrics(history, results_dir):
    # Plot training & validation accuracy
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(results_dir, "training_accuracy.png"))
    plt.close()

    # Plot training & validation loss
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')
    plt.savefig(os.path.join(results_dir, "training_loss.png"))
    plt.close()

    # Save training and validation metrics in a text file
    with open(os.path.join(results_dir, "training_validation_metrics.txt"), "w") as f:
        f.write("Training and Validation Metrics Per Epoch\n")
        f.write("=" * 50 + "\n")
        for i, (acc, val_acc, loss, val_loss) in enumerate(zip(
            history.history['accuracy'], history.history['val_accuracy'], 
            history.history['loss'], history.history['val_loss']
        )):
            f.write(f"Epoch {i+1}:\n")
            f.write(f"  Training Accuracy: {acc:.4f}, Validation Accuracy: {val_acc:.4f}\n")
            f.write(f"  Training Loss: {loss:.4f}, Validation Loss: {val_loss:.4f}\n")
            f.write("-" * 50 + "\n")

# Generate confusion matrix
def save_confusion_matrix(y_true, y_pred, class_names, results_dir):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig(os.path.join(results_dir, "confusion_matrix.png"))
    plt.close()

# Generate ROC curve
def save_roc_curve(y_true, y_probs, results_dir):
    fpr, tpr, _ = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(10, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f"ROC Curve (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(results_dir, "roc_curve.png"))
    plt.close()

def save_classification_metrics(y_true, y_pred, y_probs, results_dir, class_names):

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)  # Sensitivity = Recall
    specificity = tn / (tn + fp)  # Specificity

    # Compute additional metrics
    mcc = matthews_corrcoef(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    balanced_acc = balanced_accuracy_score(y_true, y_pred)
    jaccard = jaccard_score(y_true, y_pred, average='binary')
    logloss = log_loss(y_true, y_probs)
    fbeta = fbeta_score(y_true, y_pred, beta=0.5)  # Example for F0.5-score

    # Compute AUC
    fpr, tpr, _ = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    # Save metrics to a text file
    with open(os.path.join(results_dir, "classification_metrics.txt"), "w") as f:
        f.write(f"Precision: {precision:.4f}\n")
        f.write(f"Recall (Sensitivity): {recall:.4f}\n")
        f.write(f"F1-Score: {f1:.4f}\n")
        f.write(f"Sensitivity: {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"AUC: {roc_auc:.4f}\n")
        f.write(f"Matthews Correlation Coefficient (MCC): {mcc:.4f}\n")
        f.write(f"Cohen's Kappa: {kappa:.4f}\n")
        f.write(f"Balanced Accuracy: {balanced_acc:.4f}\n")
        f.write(f"Jaccard Index (IoU): {jaccard:.4f}\n")
        f.write(f"Log Loss: {logloss:.4f}\n")
        f.write(f"F0.5-Score: {fbeta:.4f}\n")
# Evaluate and save all metrics
def save_model_metrics(model, test_ds, results_dir, class_names):
    # Evaluate the model
    print("Evaluating the model on test data...")
    test_loss, test_accuracy = model.evaluate(test_ds)
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Save test metrics to a text file
    with open(os.path.join(results_dir, "testing_metrics.txt"), "w") as f:
        f.write(f"Test Loss: {test_loss:.4f}\n")
        f.write(f"Test Accuracy: {test_accuracy:.4f}\n")

    # Generate predictions
    y_true = np.concatenate([y for _, y in test_ds], axis=0)
    y_probs = model.predict(test_ds)
    y_pred = np.argmax(y_probs, axis=1)
    y_true = np.argmax(y_true, axis=1)  # Assuming one-hot encoded labels

    # Save confusion matrix
    save_confusion_matrix(y_true, y_pred, class_names, results_dir)

    # Save ROC curve
    if len(class_names) == 2:  # Only valid for binary classification
        save_roc_curve(y_true, y_probs[:, 1], results_dir)

    # Save classification metrics
    save_classification_metrics(y_true, y_pred, y_probs[:, 1], results_dir, class_names)

# Example usage:
# Assuming `history` is the history object returned by `model.fit()`
# and `test_ds` is your test dataset
save_training_metrics(history, results_dir)
save_model_metrics(model, test_ds, results_dir, class_names=["notumor", "tumor"])

Evaluating the model on test data...
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 460ms/step - accuracy: 0.9945 - loss: 0.1366
Test Loss: 0.3596
Test Accuracy: 0.9887
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 551ms/step
