In [1]:
import os
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras import Input, Model

In [2]:
!git clone https://github.com/WangZixuan-nus/deep-learning.git


# Define the data directories
TRAIN_DIR = "/content/deep-learning/projects/fruit-recognition/fruitdata/train"
TEST_DIR = "/content/deep-learning/projects/fruit-recognition/fruitdata/test"
AUGMENTED_TRAIN_DIR = "/content/augmented_data/train"
AUGMENTED_TEST_DIR = "/content/augmented_data/test"
MODEL_FORMAT = "keras"

def setup_directories():
    """
    Creates necessary directories for saving results and models
    """
    # Create directories if they don't exist
    os.makedirs("/content/augmented_data/train", exist_ok=True)
    os.makedirs("/content/augmented_data/test", exist_ok=True)
    os.makedirs("/content/models", exist_ok=True)
    os.makedirs("/content/results", exist_ok=True)

Cloning into 'deep-learning'...
remote: Enumerating objects: 2829, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 2829 (delta 6), reused 10 (delta 2), pack-reused 2807 (from 2)[K
Receiving objects: 100% (2829/2829), 106.98 MiB | 27.63 MiB/s, done.
Resolving deltas: 100% (36/36), done.


In [3]:

def augment_data(source_dir, target_dir, augmentation_factor=3):
    """
    Augments images in source_dir and saves them to target_dir

    Parameters:
    - source_dir: Directory containing original images
    - target_dir: Directory where augmented images will be saved
    - augmentation_factor: Number of augmented images to generate per original image
    """
    print(f"Augmenting data from {source_dir} to {target_dir}...")

    # Define the image data generator with various transformations
    datagen = ImageDataGenerator(
        rotation_range=40,        # Random rotation in the range of 40 degrees
        width_shift_range=0.2,    # Random horizontal shift up to 20%
        height_shift_range=0.2,   # Random vertical shift up to 20%
        shear_range=0.2,          # Random shearing transformations
        zoom_range=0.2,           # Random zoom up to 20%
        horizontal_flip=True,     # Randomly flip images horizontally
        fill_mode='nearest'       # Fill in newly created pixels using nearest neighbor approach
    )

    # Process each class directory
    for class_name in os.listdir(source_dir):
        # Skip non-directory files
        if not os.path.isdir(os.path.join(source_dir, class_name)):
            continue

        # Create class directory in target_dir if it doesn't exist
        class_target_dir = os.path.join(target_dir, class_name)
        os.makedirs(class_target_dir, exist_ok=True)

        # Copy original images first
        class_source_dir = os.path.join(source_dir, class_name)
        for img_name in os.listdir(class_source_dir):
            if img_name.endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(class_source_dir, img_name)
                # Load the image
                img = load_img(img_path)
                # Convert to array
                x = img_to_array(img)
                # Reshape to (1, height, width, channels)
                x = x.reshape((1,) + x.shape)

                # Copy the original image
                tf.keras.utils.save_img(
                    os.path.join(class_target_dir, img_name),
                    x[0]
                )

                # Generate augmented images
                i = 0
                for batch in datagen.flow(
                    x,
                    batch_size=1,
                    save_to_dir=class_target_dir,
                    save_prefix=f"{os.path.splitext(img_name)[0]}_aug",
                    save_format='jpg'
                ):
                    i += 1
                    if i >= augmentation_factor:
                        break

    print(f"Data augmentation completed. Augmented images saved to {target_dir}")


In [4]:
def data_load(data_dir, test_data_dir, img_height, img_width, batch_size):
    """
    Loads the training and testing datasets

    Parameters:
    - data_dir: Directory containing training images
    - test_data_dir: Directory containing testing images
    - img_height: Target image height
    - img_width: Target image width
    - batch_size: Batch size for training/testing

    Returns:
    - train_ds: Training dataset
    - val_ds: Validation/testing dataset
    - class_names: List of class names
    """
    # Load training dataset
    train_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        label_mode='categorical',
        seed=123,
        image_size=(img_height, img_width),
        batch_size=batch_size
    )

    # Load testing dataset
    val_ds = tf.keras.utils.image_dataset_from_directory(
        test_data_dir,
        label_mode='categorical',
        seed=123,
        image_size=(img_height, img_width),
        batch_size=batch_size
    )

    # Get class names from the training dataset
    class_names = train_ds.class_names

    # Use caching and prefetching for better performance
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

    return train_ds, val_ds, class_names


In [5]:
def build_cnn_model(img_shape=(224, 224, 3), class_num=15):
    """
    Builds and compiles a CNN model - fixed to use Input object properly

    Parameters:
    - img_shape: Shape of input images (height, width, channels)
    - class_num: Number of classes to predict

    Returns:
    - model: Compiled CNN model
    """
    # Create input layer explicitly
    inputs = Input(shape=img_shape)

    # Normalize pixel values to [0, 1]
    x = tf.keras.layers.Rescaling(1./255)(inputs)

    # First convolutional block
    x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(x)
    x = tf.keras.layers.MaxPooling2D(2, 2)(x)

    # Second convolutional block
    x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = tf.keras.layers.MaxPooling2D(2, 2)(x)

    # Third convolutional block
    x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = tf.keras.layers.MaxPooling2D(2, 2)(x)

    # Flatten the output for the dense layers
    x = tf.keras.layers.Flatten()(x)

    # Fully connected layer with 128 units
    x = tf.keras.layers.Dense(128, activation='relu')(x)

    # Output layer with softmax activation for multi-class classification
    outputs = tf.keras.layers.Dense(class_num, activation='softmax')(x)

    # Create model
    model = Model(inputs, outputs)

    # Display the model architecture
    model.summary()

    # Compile the model with SGD optimizer and categorical crossentropy loss
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


In [6]:
def build_mobilenet_model(img_shape=(224, 224, 3), class_num=15):
    """
    Builds and compiles a MobileNetV2-based model using transfer learning

    Parameters:
    - img_shape: Shape of input images (height, width, channels)
    - class_num: Number of classes to predict

    Returns:
    - model: Compiled MobileNetV2 model
    """
    # Create input layer explicitly
    inputs = Input(shape=img_shape)

    # Preprocessing layer to scale pixel values to [-1, 1] as expected by MobileNetV2
    x = tf.keras.layers.Rescaling(1./127.5, offset=-1)(inputs)

    # Load pre-trained MobileNetV2 without the top classifier
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=img_shape,
        include_top=False,
        weights='imagenet'
    )

    # Freeze the base model so its weights won't be updated during training
    base_model.trainable = False

    # Apply the base model
    x = base_model(x)

    # Global average pooling to reduce spatial dimensions
    x = tf.keras.layers.GlobalAveragePooling2D()(x)

    # Output layer with softmax activation for multi-class classification
    outputs = tf.keras.layers.Dense(class_num, activation='softmax')(x)

    # Create model
    model = Model(inputs, outputs)

    # Display the model architecture
    model.summary()

    # Compile the model with Adam optimizer and categorical crossentropy loss
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


In [7]:
def show_accuracy_and_loss(history, model_name="model"):
    """
    Plots the training and validation accuracy and loss

    Parameters:
    - history: History object returned by model.fit()
    - model_name: Name of the model for saving the plot
    """
    # Extract metrics from history
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Create plot
    plt.figure(figsize=(10, 8))

    # Plot accuracy
    plt.subplot(2, 1, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.ylim([min(plt.ylim()), 1])
    plt.title('Training and Validation Accuracy')

    # Plot loss
    plt.subplot(2, 1, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.ylabel('Cross Entropy')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')

    # Save the plot
    plt.tight_layout()
    filename = f"/content/results/results_{model_name}.png"
    plt.savefig(filename, dpi=100)
    print(f"Training results plot saved to {filename}")
    plt.close()


In [8]:
def train_model(model_type="cnn", epochs=15):
    """
    Trains either a CNN or MobileNet model

    Parameters:
    - model_type: Type of model to train ("cnn" or "mobilenet")
    - epochs: Number of training epochs
    """
    start_time = time.time()

    # Load data from augmented directories
    train_ds, val_ds, class_names = data_load(
        AUGMENTED_TRAIN_DIR,
        AUGMENTED_TEST_DIR,
        224, 224, 16
    )

    print(f"Training {model_type.upper()} model with {len(class_names)} classes: {class_names}")

    # Build the model based on type
    if model_type == "cnn":
        model = build_cnn_model(class_num=len(class_names))
        model_path = f"/content/models/cnn_model.{MODEL_FORMAT}"
    else:
        model = build_mobilenet_model(class_num=len(class_names))
        model_path = f"/content/models/mobilenet_model.{MODEL_FORMAT}"

    # Train the model
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs
    )

    # Save the model using modern format
    model.save(model_path)
    print(f"Model saved to {model_path}")

    # Calculate training time
    end_time = time.time()
    duration = end_time - start_time
    print(f'Training completed in {duration:.2f} seconds')

    # Show and save training progress
    show_accuracy_and_loss(history, model_type)

    return model, class_names


In [9]:
def evaluate_model(model_path, model_name):
    """
    Evaluates a trained model and generates a confusion matrix heatmap

    Parameters:
    - model_path: Path to the saved model
    - model_name: Name of the model for saving results
    """
    # Load dataset and model
    _, test_ds, class_names = data_load(
        AUGMENTED_TRAIN_DIR,
        AUGMENTED_TEST_DIR,
        224, 224, 16
    )

    # Load the model - with optimization to avoid built metrics warning
    model = tf.keras.models.load_model(model_path)

    # Evaluate model on test data - this will build the metrics
    loss, accuracy = model.evaluate(test_ds, verbose=1)
    print(f'{model_name.upper()} Test Accuracy: {accuracy:.4f}')

    # Collect predictions and true labels
    true_labels = []
    predicted_labels = []

    # Disable verbose output for predictions
    for images, labels in test_ds:
        true_batch = labels.numpy()
        predictions = model.predict(images, verbose=0)

        true_indices = np.argmax(true_batch, axis=1)
        predicted_indices = np.argmax(predictions, axis=1)

        true_labels.extend(true_indices)
        predicted_labels.extend(predicted_indices)

    # Create confusion matrix
    num_classes = len(class_names)
    confusion_matrix = np.zeros((num_classes, num_classes))

    for true, pred in zip(true_labels, predicted_labels):
        confusion_matrix[true][pred] += 1

    # Normalize by row (true labels)
    row_sums = confusion_matrix.sum(axis=1, keepdims=True)
    normalized_matrix = confusion_matrix / row_sums

    # Plot heatmap
    plt.figure(figsize=(12, 10))
    ax = plt.subplot()
    im = ax.imshow(normalized_matrix, cmap="OrRd")

    # Add labels
    ax.set_xticks(np.arange(len(class_names)))
    ax.set_yticks(np.arange(len(class_names)))
    ax.set_xticklabels(class_names, rotation=45, ha="right", rotation_mode="anchor")
    ax.set_yticklabels(class_names)

    # Add text annotations
    for i in range(len(class_names)):
        for j in range(len(class_names)):
            text = ax.text(j, i, f"{normalized_matrix[i, j]:.2f}",
                           ha="center", va="center", color="black")

    ax.set_xlabel("Predicted Label")
    ax.set_ylabel("True Label")
    ax.set_title(f"Confusion Matrix - {model_name.upper()}")

    plt.tight_layout()
    plt.colorbar(im)

    # Save heatmap
    filename = f"/content/results/heatmap_{model_name}.png"
    plt.savefig(filename, dpi=100)
    print(f"Confusion matrix heatmap saved to {filename}")
    plt.close()


In [10]:
def compare_models():
    """
    Compares the performance of CNN and MobileNet models
    """
    # Load results - paths updated to use the modern format
    cnn_model_path = f"/content/models/cnn_model.{MODEL_FORMAT}"
    mobilenet_model_path = f"/content/models/mobilenet_model.{MODEL_FORMAT}"

    # Load dataset
    _, test_ds, _ = data_load(
        AUGMENTED_TRAIN_DIR,
        AUGMENTED_TEST_DIR,
        224, 224, 16
    )

    # Load models
    cnn_model = tf.keras.models.load_model(cnn_model_path)
    mobilenet_model = tf.keras.models.load_model(mobilenet_model_path)

    # Evaluate models
    print("\nEvaluating CNN model...")
    cnn_loss, cnn_accuracy = cnn_model.evaluate(test_ds, verbose=1)

    print("\nEvaluating MobileNetV2 model...")
    mobilenet_loss, mobilenet_accuracy = mobilenet_model.evaluate(test_ds, verbose=1)

    # Create comparison plot
    models = ['CNN', 'MobileNetV2']
    accuracies = [cnn_accuracy, mobilenet_accuracy]

    plt.figure(figsize=(10, 6))
    plt.bar(models, accuracies, color=['#3498db', '#2ecc71'])
    plt.ylim([0, 1])
    plt.xlabel('Model')
    plt.ylabel('Accuracy')
    plt.title('Model Accuracy Comparison')

    # Add text labels on bars
    for i, acc in enumerate(accuracies):
        plt.text(i, acc + 0.01, f'{acc:.4f}', ha='center')

    # Save comparison
    filename = "/content/results/model_comparison.png"
    plt.savefig(filename, dpi=100)
    print(f"Model comparison saved to {filename}")
    plt.close()

    # Print comparison results
    print("\nModel Comparison Results:")
    print(f"CNN Accuracy: {cnn_accuracy:.4f}")
    print(f"MobileNetV2 Accuracy: {mobilenet_accuracy:.4f}")
    print(f"Difference: {abs(cnn_accuracy - mobilenet_accuracy):.4f}")

    if cnn_accuracy > mobilenet_accuracy:
        print("CNN model performs better on this dataset.")
    elif mobilenet_accuracy > cnn_accuracy:
        print("MobileNetV2 model performs better on this dataset.")
    else:
        print("Both models perform equally on this dataset.")


In [11]:
def main():
    """
    Main function to execute the complete workflow
    """
    # Setup directories
    setup_directories()

    # Augment training and testing data
    augment_data(TRAIN_DIR, AUGMENTED_TRAIN_DIR, augmentation_factor=3)
    augment_data(TEST_DIR, AUGMENTED_TEST_DIR, augmentation_factor=2)

    # Train CNN model
    print("\n===== Training CNN Model =====")
    _, _ = train_model(model_type="cnn", epochs=15)

    # Train MobileNet model
    print("\n===== Training MobileNet Model =====")
    _, _ = train_model(model_type="mobilenet", epochs=15)

    # Evaluate models
    print("\n===== Evaluating CNN Model =====")
    evaluate_model(f"/content/models/cnn_model.{MODEL_FORMAT}", "cnn")

    print("\n===== Evaluating MobileNet Model =====")
    evaluate_model(f"/content/models/mobilenet_model.{MODEL_FORMAT}", "mobilenet")

    # Compare models
    print("\n===== Comparing Models =====")
    compare_models()

if __name__ == "__main__":
    main()

Augmenting data from /content/deep-learning/projects/fruit-recognition/fruitdata/train to /content/augmented_data/train...
Data augmentation completed. Augmented images saved to /content/augmented_data/train
Augmenting data from /content/deep-learning/projects/fruit-recognition/fruitdata/test to /content/augmented_data/test...
Data augmentation completed. Augmented images saved to /content/augmented_data/test

===== Training CNN Model =====
Found 8679 files belonging to 15 classes.
Found 1602 files belonging to 15 classes.
Training CNN model with 15 classes: ['Hami melon', 'bitter gourd', 'carrot', 'cherry', 'cucumber', 'dragon fruit', 'durian', 'kiwi fruit', 'lemon', 'litchi', 'longan', 'mango', 'pear', 'pineapple', 'strawberry']


Epoch 1/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m430s[0m 789ms/step - accuracy: 0.1248 - loss: 2.5743 - val_accuracy: 0.2459 - val_loss: 2.4285
Epoch 2/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m399s[0m 736ms/step - accuracy: 0.3476 - loss: 1.8920 - val_accuracy: 0.3564 - val_loss: 2.1111
Epoch 3/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m447s[0m 745ms/step - accuracy: 0.4651 - loss: 1.5668 - val_accuracy: 0.4151 - val_loss: 1.9610
Epoch 4/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m405s[0m 745ms/step - accuracy: 0.5424 - loss: 1.3623 - val_accuracy: 0.4276 - val_loss: 1.9350
Epoch 5/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m400s[0m 737ms/step - accuracy: 0.6091 - loss: 1.1702 - val_accuracy: 0.5019 - val_loss: 1.6786
Epoch 6/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m400s[0m 735ms/step - accuracy: 0.6743 - loss: 0.9858 - val_accuracy: 0.5287 - val_loss: 1.6411
Epoc

Epoch 1/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 728ms/step - accuracy: 0.7033 - loss: 1.0198 - val_accuracy: 0.8826 - val_loss: 0.3833
Epoch 2/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m387s[0m 712ms/step - accuracy: 0.9569 - loss: 0.1644 - val_accuracy: 0.8945 - val_loss: 0.3437
Epoch 3/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m389s[0m 716ms/step - accuracy: 0.9855 - loss: 0.0858 - val_accuracy: 0.8995 - val_loss: 0.3314
Epoch 4/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m415s[0m 765ms/step - accuracy: 0.9937 - loss: 0.0526 - val_accuracy: 0.9026 - val_loss: 0.3263
Epoch 5/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m411s[0m 757ms/step - accuracy: 0.9974 - loss: 0.0349 - val_accuracy: 0.9026 - val_loss: 0.3241
Epoch 6/15
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m393s[0m 724ms/step - accuracy: 0.9996 - loss: 0.0242 - val_accuracy: 0.9064 - val_loss: 0.3237
Epoc