In [2]:
import numpy as np
import cv2
from pathlib import Path
import PIL.Image as Image
import os,glob
from tqdm.notebook import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pylab as plt
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    GlobalAveragePooling2D, GlobalMaxPooling2D,
    Reshape, Dense, Conv2D, Multiply, Add, Activation,
    Concatenate, BatchNormalization, ReLU
)

In [3]:
base_dir = Path('plant-village')
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32 


In [30]:
all_image_paths = []
all_image_labels = []

print("Collecting image paths and labels...")
for class_dir in tqdm(base_dir.iterdir(), desc="Classes"):
    if class_dir.is_dir():
        label = class_dir.name
        for img_path in class_dir.glob("*.jpg"):
            all_image_paths.append(str(img_path))
            all_image_labels.append(label)

print(f"Found {len(all_image_paths)} images across {len(set(all_image_labels))} classes.")



Collecting image paths and labels...


Classes: 0it [00:00, ?it/s]

Found 10835 images across 9 classes.


In [31]:
le = preprocessing.LabelEncoder()
all_image_labels_encoded = le.fit_transform(all_image_labels)

print("Label order used by model:")
for idx, label in enumerate(le.classes_):
    print(f"{idx}: {label}")



Label order used by model:
0: Pepper__bell___Bacterial_spot
1: Pepper__bell___healthy
2: Potato___Early_blight
3: Potato___Late_blight
4: Potato___healthy
5: Tomato_Leaf_Mold
6: Tomato_Spider_mites_Two_spotted_spider_mite
7: Tomato__Tomato_YellowLeaf__Curl_Virus
8: Tomato__Tomato_mosaic_virus


In [32]:
train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_image_paths, all_image_labels_encoded, test_size=0.33, random_state=0, stratify=all_image_labels_encoded
)

print(f"Train samples: {len(train_paths)}")
print(f"Test samples: {len(test_paths)}")


Train samples: 7259
Test samples: 3576


In [33]:
def preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3, dct_method='INTEGER_ACCURATE')
    image = tf.image.resize(image, [IMG_HEIGHT, IMG_WIDTH], method=tf.image.ResizeMethod.BILINEAR)
    image = tf.cast(image, tf.float32) / 255.0
    image.set_shape([IMG_HEIGHT, IMG_WIDTH, 3])  
    return image, label

In [34]:
def augment_image(image, label):
    image = tf.image.random_flip_left_right(image)
    
    return image, label


In [35]:

train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_ds = train_ds.shuffle(len(train_paths))
train_ds = train_ds.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.cache(r"D:\Desktop\tf_cache/train_cache.tf-data").repeat()   
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_ds = test_ds.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE)
test_ds = test_ds.prefetch(tf.data.AUTOTUNE)




In [11]:
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False 


In [12]:
def se_block(input_tensor, ratio=8):
    """Squeeze-and-Excitation block."""
    channel = input_tensor.shape[-1]
    se = GlobalAveragePooling2D()(input_tensor)
    se = Reshape((1, 1, channel))(se)
    se = Dense(channel // ratio, activation='relu', use_bias=False)(se)
    se = Dense(channel, activation='sigmoid', use_bias=False)(se)
    return Multiply()([input_tensor, se])  # Squeeze and excite output


In [13]:
def cbam_block_with_outputs(input_feature, ratio=8):
    channel = input_feature.shape[-1]

    
    se_refined = se_block(input_feature, ratio=ratio)

    # channel attention
    shared_dense_one = Dense(channel // ratio, activation='relu', name='cbam_dense_1')
    shared_dense_two = Dense(channel, name='cbam_dense_2')

    avg_pool = GlobalAveragePooling2D()(se_refined)
    avg_pool = Reshape((1, 1, channel))(avg_pool)
    avg_out = shared_dense_two(shared_dense_one(avg_pool))

    max_pool = GlobalMaxPooling2D()(se_refined)
    max_pool = Reshape((1, 1, channel))(max_pool)
    max_out = shared_dense_two(shared_dense_one(max_pool))

    channel_att = Activation('sigmoid')(Add()([avg_out, max_out]))
    channel_refined = Multiply()([se_refined, channel_att])  # (H, W, C)

    # Spatial attention
    avg_sp = tf.reduce_mean(channel_refined, axis=-1, keepdims=True)
    max_sp = tf.reduce_max(channel_refined, axis=-1, keepdims=True)
    concat = Concatenate()([avg_sp, max_sp])
    spatial_att = Conv2D(1, kernel_size=7, padding='same', activation='sigmoid')(concat)

    spatial_refined = Multiply()([channel_refined, spatial_att])  # (H, W, C)

    return spatial_refined, channel_att, spatial_att


In [14]:
def build_cbam_model(num_classes):
    input_tensor = tf.keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))

    # Use MobileNetV2 base
    x = base_model(input_tensor, training=False)  # Shape: (7, 7, 1280)

    # Apply CBAM
    channel = x.shape[-1]
    cbam_out, channel_att_map, spatial_att_map = cbam_block_with_outputs(x)

    # Pool + classify
    x = GlobalAveragePooling2D()(cbam_out)  # shape: (None, 1280)
    predictions = Dense(num_classes, activation='softmax', name='classification')(x)

    # Flatten channel attention for output
    # This output is likely for visualization/analysis, not directly for loss calculation
    channel_vector = Reshape((channel,))(channel_att_map)  # (None, 1280)
    spatial_map = spatial_att_map  # (None, 7, 7, 1)

    model = tf.keras.Model(inputs=input_tensor, outputs=[predictions, channel_vector, spatial_map])
    return model

In [36]:
num_classes = len(le.classes_)
model = build_cbam_model(num_classes=num_classes)

model.compile(
    optimizer='adam',
    loss={'classification':'sparse_categorical_crossentropy'},  
    metrics={'classification':'accuracy'}
)

epochs = 30


print("Starting model training...")
history = model.fit(
    train_ds,
    epochs=epochs,
    validation_data=test_ds,
    steps_per_epoch=len(train_paths) // BATCH_SIZE,
    validation_steps=len(test_paths) // BATCH_SIZE

)

print("Training complete.")


Starting model training...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training complete.


In [38]:
model.save("../saved_models/cbam_with_attention_outputs_se_tfdataiiD")

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("rcbam_attention_tfdata.tflite", "wb") as f:
    f.write(tflite_model)

print("Model saved and converted to TFLite.")


INFO:tensorflow:Assets written to: ../saved_models/cbam_with_attention_outputs_se_tfdataiiD\assets


INFO:tensorflow:Assets written to: ../saved_models/cbam_with_attention_outputs_se_tfdataiiD\assets


INFO:tensorflow:Assets written to: C:\Users\kariu\AppData\Local\Temp\tmp93wheaz7\assets


INFO:tensorflow:Assets written to: C:\Users\kariu\AppData\Local\Temp\tmp93wheaz7\assets


Model saved and converted to TFLite.


In [37]:
from sklearn.metrics import confusion_matrix, classification_report

print("Making predictions on the test set...")
# Get predictions (only the classification output)
y_pred_probs, _, _ = model.predict(test_ds)
y_pred = y_pred_probs.argmax(axis=1)

# Extract true labels from the test_ds for evaluation
# This requires iterating through the dataset, which might be slow for very large datasets
# If test_labels (from train_test_split) is reliable, use that directly.
y_true_list = []
for images, labels in test_ds:
    y_true_list.extend(labels.numpy())
y_true = np.array(y_true_list)

print("Confusion Matrix:")
cm = confusion_matrix(y_true, y_pred)
print(cm)

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=le.classes_, digits=4))

Making predictions on the test set...
Confusion Matrix:
[[ 323    4    0    0    0    0    1    1    0]
 [   5  483    0    0    0    0    0    0    0]
 [   0    0  323    7    0    0    0    0    0]
 [   0    0    6  316    7    0    1    0    0]
 [   0    0    0    0   50    0    0    0    0]
 [   0    0    0    0    0  306    2    3    3]
 [   0    2    0    0    0   11  530    6    4]
 [   3    0    0    0    0    2    4 1049    1]
 [   0    0    0    0    0    2    0    0  121]]

Classification Report:
                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot     0.9758    0.9818    0.9788       329
                     Pepper__bell___healthy     0.9877    0.9898    0.9887       488
                      Potato___Early_blight     0.9818    0.9788    0.9803       330
                       Potato___Late_blight     0.9783    0.9576    0.9678       330
                           Potato___healthy     0.8772    1.0

In [1]:
import tensorflow as tf

interpreter = tf.lite.Interpreter(model_path="cbam_attention_tfdata.tflite")
interpreter.allocate_tensors()

output_details = interpreter.get_output_details()

print("Output Details:")
for detail in output_details:
    print(f"  Index: {detail['index']}, Name: {detail['name']}, Shape: {detail['shape']}, Dtype: {detail['dtype']}")


Output Details:
  Index: 284, Name: StatefulPartitionedCall:1, Shape: [1 7 7 1], Dtype: <class 'numpy.float32'>
  Index: 278, Name: StatefulPartitionedCall:2, Shape: [   1 1280], Dtype: <class 'numpy.float32'>
  Index: 288, Name: StatefulPartitionedCall:0, Shape: [ 1 15], Dtype: <class 'numpy.float32'>


In [1]:
import tensorflow as tf

interpreter = tf.lite.Interpreter(model_path="cbam_attention_tfdata.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Inputs:")
for d in input_details:
    print(f"Index: {d['index']}, Shape: {d['shape']}, DType: {d['dtype']}")

print("\nOutputs:")
for d in output_details:
    print(f"Index: {d['index']}, Shape: {d['shape']}, DType: {d['dtype']}")


Inputs:
Index: 0, Shape: [  1 224 224   3], DType: <class 'numpy.float32'>

Outputs:
Index: 284, Shape: [1 7 7 1], DType: <class 'numpy.float32'>
Index: 278, Shape: [   1 1280], DType: <class 'numpy.float32'>
Index: 288, Shape: [ 1 15], DType: <class 'numpy.float32'>
