In [2]:
# Image Classification using DenseNet121 Transfer Learning Model
# with comprehensive metrics evaluation - For folder structure dataset

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, confusion_matrix, matthews_corrcoef,
    cohen_kappa_score, classification_report, fbeta_score
)
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Set random seed for reproducibility
import tensorflow as tf
import random as python_random
np.random.seed(42)
python_random.seed(42)
tf.random.set_seed(42)

# Configuration parameters
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.0001

# Define metrics calculation function
def calculate_metrics(y_true, y_pred, y_pred_prob=None):
    """
    Calculate various classification metrics
    """
    # Basic metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')

    # F-scores
    f1 = f1_score(y_true, y_pred, average='weighted')

    # F2 score (beta=2): emphasizes recall over precision
    f2 = fbeta_score(y_true, y_pred, beta=2, average='weighted')

    # F0.5 score (beta=0.5): emphasizes precision over recall
    f0_5 = fbeta_score(y_true, y_pred, beta=0.5, average='weighted')

    # Matthews Correlation Coefficient
    mcc = matthews_corrcoef(y_true, y_pred)

    # Cohen's Kappa
    kappa = cohen_kappa_score(y_true, y_pred)

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)

    # Return all metrics as a dictionary
    metrics = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'f2': f2,
        'f0.5': f0_5,
        'mcc': mcc,
        'kappa': kappa,
        'confusion_matrix': cm
    }

    return metrics

# Function to build the DenseNet121 model
def build_model(num_classes):
    # Load the DenseNet121 model with pre-trained weights
    base_model = DenseNet121(
        weights='imagenet',
        include_top=False,
        input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
    )

    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    # Add custom classification layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.3)(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    # Create the model
    model = Model(inputs=base_model.input, outputs=predictions)

    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=LEARNING_RATE),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# Function to train the model
def train_model(model, train_generator, validation_generator, epochs=EPOCHS):
    # Callbacks
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )

    model_checkpoint = ModelCheckpoint(
        'best_densenet121_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    )

    # Train the model
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // BATCH_SIZE,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // BATCH_SIZE,
        callbacks=[early_stopping, model_checkpoint]
    )

    return history, model

# Function to evaluate the model and compute all metrics
def evaluate_model(model, test_generator):
    # Get predictions
    test_generator.reset()
    y_pred_prob = model.predict(test_generator, steps=np.ceil(test_generator.samples / BATCH_SIZE))
    y_pred = np.argmax(y_pred_prob, axis=1)

    # Get true labels
    y_true = test_generator.classes

    # Calculate metrics
    metrics = calculate_metrics(y_true, y_pred, y_pred_prob)

    # Print detailed metrics
    print("\n===== Model Evaluation Metrics =====")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")
    print(f"F2 Score: {metrics['f2']:.4f}")
    print(f"F0.5 Score: {metrics['f0.5']:.4f}")
    print(f"Matthews Correlation Coefficient: {metrics['mcc']:.4f}")
    print(f"Cohen's Kappa: {metrics['kappa']:.4f}")

    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(
        metrics['confusion_matrix'],
        annot=True,
        fmt='d',
        cmap='Blues',
        xticklabels=test_generator.class_indices.keys(),
        yticklabels=test_generator.class_indices.keys()
    )
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    plt.show()

    # Plot metrics in a bar chart
    metrics_to_plot = {
        'Accuracy': metrics['accuracy'],
        'Precision': metrics['precision'],
        'Recall': metrics['recall'],
        'F1': metrics['f1'],
        'F2': metrics['f2'],
        'F0.5': metrics['f0.5'],
        'MCC': metrics['mcc'],
        'Kappa': metrics['kappa']
    }

    plt.figure(figsize=(12, 6))
    sns.barplot(x=list(metrics_to_plot.keys()), y=list(metrics_to_plot.values()))
    plt.ylim(0, 1)
    plt.title('Performance Metrics')
    plt.ylabel('Score')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('performance_metrics.png')
    plt.show()

    # Create a detailed classification report
    class_names = list(test_generator.class_indices.keys())
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    report_df = pd.DataFrame(report).transpose()
    print("\nClassification Report:")
    print(report_df)
    report_df.to_csv('classification_report.csv')

    return metrics

# Function to plot training history
def plot_training_history(history):
    plt.figure(figsize=(12, 5))

    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train')
    plt.plot(history.history['val_accuracy'], label='Validation')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

def setup_data_generators(data_folder):
    """
    Set up data generators based on the folder structure
    """
    # Check if the dataset has train/test split already
    has_train_test_split = os.path.exists(os.path.join(data_folder, 'train')) and os.path.exists(os.path.join(data_folder, 'test'))

    # Data generators
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        zoom_range=0.2,
        shear_range=0.2,
        validation_split=0.2  # 20% for validation
    )

    test_datagen = ImageDataGenerator(rescale=1./255)

    if has_train_test_split:
        # If dataset already has train/test folders
        train_generator = train_datagen.flow_from_directory(
            os.path.join(data_folder, 'train'),
            target_size=IMAGE_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='categorical',
            shuffle=True,
            subset='training'
        )

        validation_generator = train_datagen.flow_from_directory(
            os.path.join(data_folder, 'train'),
            target_size=IMAGE_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='categorical',
            shuffle=False,
            subset='validation'
        )

        test_generator = test_datagen.flow_from_directory(
            os.path.join(data_folder, 'test'),
            target_size=IMAGE_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='categorical',
            shuffle=False
        )
    else:
        # If dataset is just a folder with class subfolders
        print("No train/test split found. Using validation_split for evaluation.")
        train_generator = train_datagen.flow_from_directory(
            data_folder,
            target_size=IMAGE_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='categorical',
            shuffle=True,
            subset='training'
        )

        validation_generator = train_datagen.flow_from_directory(
            data_folder,
            target_size=IMAGE_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='categorical',
            shuffle=False,
            subset='validation'
        )

        # Use validation set as test set since we don't have a separate test set
        test_generator = validation_generator

    return train_generator, validation_generator, test_generator

# Main execution function
def run_classification(data_folder):
    print("Starting image classification with DenseNet121...")
    print(f"Using data folder: {data_folder}")

    # Check if the dataset exists
    if not os.path.exists(data_folder):
        print(f"Error: Data folder '{data_folder}' does not exist.")
        return None, None

    # Set up data generators based on folder structure
    train_generator, validation_generator, test_generator = setup_data_generators(data_folder)

    # Print class mapping
    class_indices = train_generator.class_indices
    num_classes = len(class_indices)
    print(f"Found {num_classes} classes: {list(class_indices.keys())}")
    print(f"Class mapping: {class_indices}")
    print(f"Total training samples: {train_generator.samples}")
    print(f"Total validation samples: {validation_generator.samples}")
    print(f"Total test samples: {test_generator.samples}")

    # Build the model
    model = build_model(num_classes)
    print(model.summary())

    # Train the model
    print("\nTraining the model...")
    history, model = train_model(model, train_generator, validation_generator)

    # Plot training history
    plot_training_history(history)

    # Evaluate the model
    print("\nEvaluating the model...")
    metrics = evaluate_model(model, test_generator)

    # Save metrics to CSV
    metrics_df = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1', 'F2', 'F0.5', 'MCC', 'Kappa'],
        'Value': [
            metrics['accuracy'], metrics['precision'], metrics['recall'],
            metrics['f1'], metrics['f2'], metrics['f0.5'],
            metrics['mcc'], metrics['kappa']
        ]
    })
    metrics_df.to_csv('metrics_results.csv', index=False)
    print(f"Metrics saved to 'metrics_results.csv'")

    # Fine-tuning: unfreeze some layers and continue training with lower learning rate
    print("\nFine-tuning the model...")

    # Unfreeze the last 30 layers of the base model
    for layer in model.layers[0].layers[-30:]:
        layer.trainable = True

    # Recompile the model with a lower learning rate
    model.compile(
        optimizer=Adam(learning_rate=LEARNING_RATE/10),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Train for a few more epochs
    fine_tune_history, model = train_model(
        model,
        train_generator,
        validation_generator,
        epochs=10
    )

    # Plot fine-tuning history
    plot_training_history(fine_tune_history)

    # Final evaluation after fine-tuning
    print("\nFinal evaluation after fine-tuning...")
    final_metrics = evaluate_model(model, test_generator)

    # Save final metrics to CSV
    final_metrics_df = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1', 'F2', 'F0.5', 'MCC', 'Kappa'],
        'Value': [
            final_metrics['accuracy'], final_metrics['precision'], final_metrics['recall'],
            final_metrics['f1'], final_metrics['f2'], final_metrics['f0.5'],
            final_metrics['mcc'], final_metrics['kappa']
        ]
    })
    final_metrics_df.to_csv('final_metrics_results.csv', index=False)
    print(f"Final metrics saved to 'final_metrics_results.csv'")

    print("\nImage classification complete!")
    return model, final_metrics

# Example usage - run in Google Colab
if __name__ == "__main__":
    # Install required packages if they're not already installed
    try:
        import tensorflow as tf
        import sklearn
    except ImportError:
        print("Installing required packages...")
        !pip install tensorflow scikit-learn pandas seaborn matplotlib

    # For Colab, you might want to mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # ======= IMPORTANT: SPECIFY YOUR DATA FOLDER PATH HERE =======
    # You can use a path on Google Drive
    DATA_FOLDER = "/content/drive/MyDrive/train"  # Change this to your dataset folder

    # Or use a local path if you've uploaded directly to Colab
    # DATA_FOLDER = "/content/dataset_folder"

    # ======= Alternative: uncomment below to use file browser to select the folder =======
    """
    from google.colab import files

    def select_folder():
        print("Please select your dataset folder:")
        uploaded = files.upload()
        if not uploaded:
            return None
        return list(uploaded.keys())[0]

    selected_folder = select_folder()
    if selected_folder:
        DATA_FOLDER = os.path.join("/content", selected_folder)
    """

    # Run the classification pipeline
    model, metrics = run_classification(DATA_FOLDER)

Mounted at /content/drive
Starting image classification with DenseNet121...
Using data folder: /content/drive/MyDrive/train
No train/test split found. Using validation_split for evaluation.
Found 8192 images belonging to 4 classes.
Found 2048 images belonging to 4 classes.
Found 4 classes: ['Mild Impairment', 'Moderate Impairment', 'No Impairment', 'Very Mild Impairment']
Class mapping: {'Mild Impairment': 0, 'Moderate Impairment': 1, 'No Impairment': 2, 'Very Mild Impairment': 3}
Total training samples: 8192
Total validation samples: 2048
Total test samples: 2048
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


None

Training the model...


  self._warn_if_super_not_called()


Epoch 1/20
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8s/step - accuracy: 0.3507 - loss: 1.5216
Epoch 1: val_accuracy improved from -inf to 0.57422, saving model to best_densenet121_model.h5




[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2673s[0m 10s/step - accuracy: 0.3509 - loss: 1.5207 - val_accuracy: 0.5742 - val_loss: 0.9739
Epoch 2/20


KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')