In [1]:
import numpy as np
import cv2
from pathlib import Path
import PIL.Image as Image
import os,glob
from tqdm.notebook import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pylab as plt
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    GlobalAveragePooling2D, GlobalMaxPooling2D,
    Reshape, Dense, Conv2D, Multiply, Add, Activation,
    Concatenate, BatchNormalization, ReLU
)

In [2]:
base_dir = Path('plant-village')
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32 


In [3]:
all_image_paths = []
all_image_labels = []

print("Collecting image paths and labels...")
for class_dir in tqdm(base_dir.iterdir(), desc="Classes"):
    if class_dir.is_dir():
        label = class_dir.name
        for img_path in class_dir.glob("*.jpg"):
            all_image_paths.append(str(img_path))
            all_image_labels.append(label)

print(f"Found {len(all_image_paths)} images across {len(set(all_image_labels))} classes.")



Collecting image paths and labels...


Classes: 0it [00:00, ?it/s]

Found 2152 images across 3 classes.


In [4]:
le = preprocessing.LabelEncoder()
all_image_labels_encoded = le.fit_transform(all_image_labels)

print("Label order used by model:")
for idx, label in enumerate(le.classes_):
    print(f"{idx}: {label}")



Label order used by model:
0: Potato___Early_blight
1: Potato___Late_blight
2: Potato___healthy


In [5]:
train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_image_paths, all_image_labels_encoded, test_size=0.33, random_state=0, stratify=all_image_labels_encoded
)

print(f"Train samples: {len(train_paths)}")
print(f"Test samples: {len(test_paths)}")


Train samples: 1441
Test samples: 711


In [6]:
def preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3) 
    image = tf.image.resize(image, [IMG_HEIGHT, IMG_WIDTH])
    image = image / 255.0  
    return image, label


In [15]:
def augment_image(image, label):
    # Apply random horizontal flip
    image = tf.image.random_flip_left_right(image)

    # Apply random brightness adjustment
    image = tf.image.random_brightness(image, max_delta=0.2) # Adjust brightness by up to +/- 20%

    # Apply random contrast adjustment
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2) # Adjust contrast by 80% to 120%

    # Optionally, apply random zoom or rotation using Keras preprocessing layers
    # (These are typically better applied as part of the Keras model's first layers,
    # or you'd need to create them as standalone callables for tf.data.Dataset.map)

    return image, label


In [16]:
# Create tf.data.Dataset for training and testing
train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_ds = train_ds.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
# Apply augmentation ONLY to the training dataset
train_ds = train_ds.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE) # Add this line
train_ds = train_ds.shuffle(buffer_size=len(train_paths))
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_ds = test_ds.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE)
test_ds = test_ds.prefetch(buffer_size=tf.data.AUTOTUNE)



In [17]:
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False # Freeze the base model weights


In [18]:
def se_block(input_tensor, ratio=8):
    """Squeeze-and-Excitation block."""
    channel = input_tensor.shape[-1]
    se = GlobalAveragePooling2D()(input_tensor)
    se = Reshape((1, 1, channel))(se)
    se = Dense(channel // ratio, activation='relu', use_bias=False)(se)
    se = Dense(channel, activation='sigmoid', use_bias=False)(se)
    return Multiply()([input_tensor, se])  # Squeeze and excite output


In [19]:
def cbam_block_with_outputs(input_feature, ratio=8):
    channel = input_feature.shape[-1]

    # 1. Apply SE block first (preconditioning) - as in your original code
    se_refined = se_block(input_feature, ratio=ratio)

    # 2. Channel attention (CBAM)
    shared_dense_one = Dense(channel // ratio, activation='relu', name='cbam_dense_1')
    shared_dense_two = Dense(channel, name='cbam_dense_2')

    avg_pool = GlobalAveragePooling2D()(se_refined)
    avg_pool = Reshape((1, 1, channel))(avg_pool)
    avg_out = shared_dense_two(shared_dense_one(avg_pool))

    max_pool = GlobalMaxPooling2D()(se_refined)
    max_pool = Reshape((1, 1, channel))(max_pool)
    max_out = shared_dense_two(shared_dense_one(max_pool))

    channel_att = Activation('sigmoid')(Add()([avg_out, max_out]))
    channel_refined = Multiply()([se_refined, channel_att])  # (H, W, C)

    # 3. Spatial attention (CBAM)
    avg_sp = tf.reduce_mean(channel_refined, axis=-1, keepdims=True)
    max_sp = tf.reduce_max(channel_refined, axis=-1, keepdims=True)
    concat = Concatenate()([avg_sp, max_sp])
    spatial_att = Conv2D(1, kernel_size=7, padding='same', activation='sigmoid')(concat)

    spatial_refined = Multiply()([channel_refined, spatial_att])  # (H, W, C)

    return spatial_refined, channel_att, spatial_att


In [20]:
def build_cbam_model(num_classes):
    input_tensor = tf.keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))

    # Use MobileNetV2 base
    x = base_model(input_tensor, training=False)  # Shape: (7, 7, 1280)

    # Apply CBAM
    channel = x.shape[-1]
    cbam_out, channel_att_map, spatial_att_map = cbam_block_with_outputs(x)

    # Pool + classify
    x = GlobalAveragePooling2D()(cbam_out)  # shape: (None, 1280)
    predictions = Dense(num_classes, activation='softmax', name='classification')(x)

    # Flatten channel attention for output
    # This output is likely for visualization/analysis, not directly for loss calculation
    channel_vector = Reshape((channel,))(channel_att_map)  # (None, 1280)
    spatial_map = spatial_att_map  # (None, 7, 7, 1)

    model = tf.keras.Model(inputs=input_tensor, outputs=[predictions, channel_vector, spatial_map])
    return model

In [12]:
num_classes = len(le.classes_)
model = build_cbam_model(num_classes=num_classes)

model.compile(
    optimizer='adam',
    loss={'classification':'sparse_categorical_crossentropy'},  
    metrics={'classification':'accuracy'}
)

epochs = 10 

print("Starting model training...")
history = model.fit(
    train_ds,
    epochs=epochs,
    validation_data=test_ds
)

print("Training complete.")


Starting model training...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training complete.


In [13]:
model.save("../saved_models/cbam_with_attention_outputs_se_tfdata")

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("cbam_attention_tfdata.tflite", "wb") as f:
    f.write(tflite_model)

print("Model saved and converted to TFLite.")


INFO:tensorflow:Assets written to: ../saved_models/cbam_with_attention_outputs_se_tfdata\assets


INFO:tensorflow:Assets written to: ../saved_models/cbam_with_attention_outputs_se_tfdata\assets


INFO:tensorflow:Assets written to: C:\Users\kariu\AppData\Local\Temp\tmpnlfo4d9t\assets


INFO:tensorflow:Assets written to: C:\Users\kariu\AppData\Local\Temp\tmpnlfo4d9t\assets


Model saved and converted to TFLite.


In [14]:
from sklearn.metrics import confusion_matrix, classification_report

print("Making predictions on the test set...")
# Get predictions (only the classification output)
y_pred_probs, _, _ = model.predict(test_ds)
y_pred = y_pred_probs.argmax(axis=1)

# Extract true labels from the test_ds for evaluation
# This requires iterating through the dataset, which might be slow for very large datasets
# If test_labels (from train_test_split) is reliable, use that directly.
y_true_list = []
for images, labels in test_ds:
    y_true_list.extend(labels.numpy())
y_true = np.array(y_true_list)

print("Confusion Matrix:")
cm = confusion_matrix(y_true, y_pred)
print(cm)

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=le.classes_, digits=4))

Making predictions on the test set...
Confusion Matrix:
[[329   2   0]
 [  6 307  17]
 [  0   0  50]]

Classification Report:
                       precision    recall  f1-score   support

Potato___Early_blight     0.9821    0.9940    0.9880       331
 Potato___Late_blight     0.9935    0.9303    0.9609       330
     Potato___healthy     0.7463    1.0000    0.8547        50

             accuracy                         0.9648       711
            macro avg     0.9073    0.9748    0.9345       711
         weighted avg     0.9708    0.9648    0.9660       711

