In [None]:
# train.ipynb (converted to .py for preview, you can use it in Jupyter Notebook directly)

import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc
from sklearn.preprocessing import label_binarize
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, VGG19, EfficientNetB0, MobileNetV2
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_pre
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_pre
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mob_pre
from tensorflow.keras import layers, models, optimizers

# Set paths
DATASET_DIR = 'sdc'
TRAIN_DIR = os.path.join(DATASET_DIR, 'train')
TEST_DIR = os.path.join(DATASET_DIR, 'test')

# Parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 8
CLASSES = sorted(os.listdir(TRAIN_DIR))
NUM_CLASSES = len(CLASSES)

# Load data

def get_data(preprocess_func):
    datagen = ImageDataGenerator(preprocessing_function=preprocess_func, validation_split=0.2)

    train_gen = datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True,
        subset='training'
    )

    val_gen = datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False,
        subset='validation'
    )

    test_datagen = ImageDataGenerator(preprocessing_function=preprocess_func)
    test_gen = test_datagen.flow_from_directory(
        TEST_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )
    return train_gen, val_gen, test_gen

# Build model

def build_model(base_model):
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Plot training history

def plot_history(history, model_name):
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.title(f'{model_name} - Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title(f'{model_name} - Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Plot ROC curves for all classes

def plot_all_roc_curves(y_true_bin, y_pred_probs, model_name):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(NUM_CLASSES):
        fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred_probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    plt.figure(figsize=(15, 10))
    for i in range(NUM_CLASSES):
        plt.plot(fpr[i], tpr[i], label=f'Class {CLASSES[i]} (AUC = {roc_auc[i]:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.title(f'{model_name} - ROC Curves (All Classes)')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right', fontsize='small')
    plt.grid()
    plt.show()

# Train & evaluate model

def train_and_evaluate(name, base_model_func, preprocess_func):
    print(f"\n--- Training {name} ---")
    base_model = base_model_func(include_top=False, input_shape=IMG_SIZE + (3,), weights='imagenet')
    base_model.trainable = False

    model = build_model(base_model)
    train_gen, val_gen, test_gen = get_data(preprocess_func)

    history = model.fit(train_gen, validation_data=val_gen, epochs=5, verbose=1)

    plot_history(history, name)

    print("\nEvaluation:")
    train_loss, train_acc = model.evaluate(train_gen, verbose=0)
    val_loss, val_acc = model.evaluate(val_gen, verbose=0)
    test_loss, test_acc = model.evaluate(test_gen, verbose=0)

    print(f"Train Accuracy: {train_acc:.4f} | Train Loss: {train_loss:.4f}")
    print(f"Validation Accuracy: {val_acc:.4f} | Val Loss: {val_loss:.4f}")
    print(f"Test Accuracy: {test_acc:.4f} | Test Loss: {test_loss:.4f}")

    y_true = test_gen.classes
    y_pred_probs = model.predict(test_gen)
    y_pred = np.argmax(y_pred_probs, axis=1)

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=test_gen.class_indices.keys()))

    print("\nConfusion Matrix:")
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=test_gen.class_indices.keys(), yticklabels=test_gen.class_indices.keys())
    plt.title(f'{name} - Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

    y_true_bin = label_binarize(y_true, classes=range(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, y_pred_probs, average='macro', multi_class='ovr')
    print(f"\nROC AUC Score: {auc_score:.4f}")

    plot_all_roc_curves(y_true_bin, y_pred_probs, name)

# Run all models
MODELS = [
    ("VGG16", VGG16, vgg_pre),
    ("VGG19", VGG19, vgg_pre),
    ("EfficientNetB0", EfficientNetB0, eff_pre),
    ("MobileNetV2", MobileNetV2, mob_pre)
]

for name, model_func, pre_func in MODELS:
    train_and_evaluate(name, model_func, pre_func)



--- Training VGG16 ---
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 1us/step
Found 25760 images belonging to 23 classes.
Found 6440 images belonging to 23 classes.
Found 4002 images belonging to 23 classes.


  self._warn_if_super_not_called()


Epoch 1/5
[1m 680/3220[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m1:24:35[0m 2s/step - accuracy: 0.0952 - loss: 3.8698