In [None]:
%pip install opencv-python-headless
%pip install seaborn
%pip install matplotlib
%pip install scikit-learn

In [None]:
import tensorflow as tf
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, roc_curve, auc, precision_recall_fscore_support, matthews_corrcoef, cohen_kappa_score, balanced_accuracy_score, jaccard_score, log_loss, fbeta_score
)

batch_size = 32
learning_rate = 0.000001
group_type = "unu"
model_name = "ResNet101V2"
baseDir = "./datasets/uti-no-uti/"
os.listdir(baseDir)

In [None]:
# Save training metrics (loss and accuracy)
def save_training_metrics(history, results_dir):
    # Plot training & validation accuracy
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(results_dir, "training_accuracy.png"))
    plt.close()

    # Plot training & validation loss
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')
    plt.savefig(os.path.join(results_dir, "training_loss.png"))
    plt.close()

    # Save training and validation metrics in a text file
    with open(os.path.join(results_dir, "training_validation_metrics.txt"), "w") as f:
        f.write("Training and Validation Metrics Per Epoch\n")
        f.write("=" * 50 + "\n")
        for i, (acc, val_acc, loss, val_loss) in enumerate(zip(
            history.history['accuracy'], history.history['val_accuracy'], 
            history.history['loss'], history.history['val_loss']
        )):
            f.write(f"Epoch {i+1}:\n")
            f.write(f"  Training Accuracy: {acc:.4f}, Validation Accuracy: {val_acc:.4f}\n")
            f.write(f"  Training Loss: {loss:.4f}, Validation Loss: {val_loss:.4f}\n")
            f.write("-" * 50 + "\n")

# Generate confusion matrix
def save_confusion_matrix(y_true, y_pred, class_names, results_dir):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig(os.path.join(results_dir, "confusion_matrix.png"))
    plt.close()

# Generate ROC curve
def save_roc_curve(y_true, y_probs, results_dir):
    fpr, tpr, _ = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(10, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f"ROC Curve (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(results_dir, "roc_curve.png"))
    plt.close()

def save_classification_metrics(y_true, y_pred, y_probs, results_dir, class_names):

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)  # Sensitivity = Recall
    specificity = tn / (tn + fp)  # Specificity

    # Compute additional metrics
    mcc = matthews_corrcoef(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    balanced_acc = balanced_accuracy_score(y_true, y_pred)
    jaccard = jaccard_score(y_true, y_pred, average='binary')
    logloss = log_loss(y_true, y_probs)
    fbeta = fbeta_score(y_true, y_pred, beta=0.5)  # Example for F0.5-score

    # Compute AUC
    fpr, tpr, _ = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    # Save metrics to a text file
    with open(os.path.join(results_dir, "classification_metrics.txt"), "w") as f:
        f.write(f"Precision: {precision:.4f}\n")
        f.write(f"Recall (Sensitivity): {recall:.4f}\n")
        f.write(f"F1-Score: {f1:.4f}\n")
        f.write(f"Sensitivity: {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"AUC: {roc_auc:.4f}\n")
        f.write(f"Matthews Correlation Coefficient (MCC): {mcc:.4f}\n")
        f.write(f"Cohen's Kappa: {kappa:.4f}\n")
        f.write(f"Balanced Accuracy: {balanced_acc:.4f}\n")
        f.write(f"Jaccard Index (IoU): {jaccard:.4f}\n")
        f.write(f"Log Loss: {logloss:.4f}\n")
        f.write(f"F0.5-Score: {fbeta:.4f}\n")
# Evaluate and save all metrics
def save_model_metrics(model, test_ds, results_dir, class_names):
    # Evaluate the model
    print("Evaluating the model on test data...")
    test_loss, test_accuracy = model.evaluate(test_ds)
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Save test metrics to a text file
    with open(os.path.join(results_dir, "testing_metrics.txt"), "w") as f:
        f.write(f"Test Loss: {test_loss:.4f}\n")
        f.write(f"Test Accuracy: {test_accuracy:.4f}\n")

    # Generate predictions
    y_true = np.concatenate([y for _, y in test_ds], axis=0)
    y_probs = model.predict(test_ds)
    y_pred = np.argmax(y_probs, axis=1)
    y_true = np.argmax(y_true, axis=1)  # Assuming one-hot encoded labels

    # Save confusion matrix
    save_confusion_matrix(y_true, y_pred, class_names, results_dir)

    # Save ROC curve
    if len(class_names) == 2:  # Only valid for binary classification
        save_roc_curve(y_true, y_probs[:, 1], results_dir)

    # Save classification metrics
    save_classification_metrics(y_true, y_pred, y_probs[:, 1], results_dir, class_names)

In [None]:
# ANOVA requires that you don't have a seed.

def get_images_and_classes():
    # Load training and validation datasets

    train_ds = tf.keras.utils.image_dataset_from_directory(
        baseDir + "train",
        labels="inferred",
        label_mode="categorical",  # Use categorical for multi-class classification
        image_size=(224, 224),
        batch_size=batch_size,
        shuffle=True,
    )

    val_ds = tf.keras.utils.image_dataset_from_directory(
        baseDir + "dev",
        labels="inferred",
        label_mode="categorical",
        image_size=(224, 224),
        batch_size=batch_size,
        shuffle=True,
    )

    # Test dataset (assuming separate directory for test data)
    test_ds = tf.keras.utils.image_dataset_from_directory(
        baseDir + "test",
        labels="inferred",
        label_mode="categorical",
        image_size=(224, 224),
        batch_size=batch_size,
        shuffle=False  # No shuffling for test set
    )

    class_names = train_ds.class_names

    for images, labels in train_ds.take(1):
        print(f"Images shape: {images.shape}")
        print(f"Labels shape: {labels.shape}")
    
    return (train_ds, val_ds, test_ds, class_names)

### Dataset normalization

In [None]:
normalization_layer = tf.keras.layers.Rescaling(1./255)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomContrast(factor=0.2),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomRotation(0.1)
]) # Light Augmentation

def augment_and_normalize(_train_ds, _val_ds, _test_ds):
    internal_train_ds = _train_ds.map(lambda x, y: (normalization_layer(data_augmentation(x, training=True)), y))
    internal_val_ds = _val_ds.map(lambda x, y: (normalization_layer(x), y))
    internal_test_ds = _test_ds.map(lambda x, y: (normalization_layer(x), y))

    AUTOTUNE = tf.data.AUTOTUNE
    output_train_ds = internal_train_ds.prefetch(buffer_size=AUTOTUNE)
    output_val_ds = internal_val_ds.prefetch(buffer_size=AUTOTUNE)
    output_test_ds = internal_test_ds.prefetch(buffer_size=AUTOTUNE)
    
    return (output_train_ds, output_val_ds, output_test_ds)

### Load ResNet50V2 architecture

In [None]:
def create_and_compile_model():
    ResNet101V2 = tf.keras.applications.ResNet101V2(input_shape = (224,224,3), include_top=False)

    model = Sequential([
        tf.keras.Input(shape=(224, 224, 3)),  # 🔹 Explicit input layer
        ResNet101V2,
        Flatten(),
        Dense(2, activation='softmax')  # Final classification layer
    ])

    # Call model to initialize input shape
    model.build(input_shape=(None, 224, 224, 3))

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',  # Suitable for binary classification
                  metrics=['accuracy'])
    
    return model

### Train model

In [None]:
import os

ANOVA_REPEATS = 5
epochs = 35

for index in range(1, ANOVA_REPEATS + 1):
    print("REPEAT ROUND: ", index)

    (train_ds, val_ds, test_ds, class_names) = get_images_and_classes()
    (output_train_ds, output_val_ds, output_test_ds) = augment_and_normalize(train_ds, val_ds, test_ds)
    model = create_and_compile_model()

    history = model.fit(output_train_ds, epochs=epochs, validation_data=output_val_ds)

    path = "results/" + group_type + "/" + model_name +"/"
    name = group_type.upper() + "_" + model_name +"_Round" + str(index) # Rounds are retrains for ANOVA

    # Ensure the results directory exists
    results_dir = path + name + "/"
    os.makedirs(results_dir, exist_ok=True)
    model.save(results_dir + name + ".keras")

    save_training_metrics(history, results_dir)
    save_model_metrics(model, output_test_ds, results_dir, class_names=class_names)

[1m 8/21[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m2:28[0m 11s/step - accuracy: 0.5957 - loss: 0.8699