In [1]:
from imblearn.over_sampling import SMOTE
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import numpy as np
import cv2
import os


In [2]:
# Load dataset
def load_images_and_labels(dataset_path, img_size=(224, 224)):
    images, labels = [], []
    for label_dir in os.listdir(dataset_path):
        for img_file in os.listdir(os.path.join(dataset_path, label_dir)):
            img_path = os.path.join(dataset_path, label_dir, img_file)
            image = cv2.imread(img_path)
            image = cv2.resize(image, img_size)
            images.append(image)
            labels.append(label_dir)
    return np.array(images), np.array(labels)

images, labels = load_images_and_labels("./wheat_leaf")
images = images / 255.0  # Normalize the images to [0, 1] range

In [3]:
# Encode labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)


In [4]:
# Split into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.3, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [6]:
from imblearn.combine import SMOTETomek

# Check class distribution in the training set
from collections import Counter
class_distribution = Counter(np.argmax(y_train, axis=1))
print("Class distribution in the training set:", class_distribution)

# Apply SMOTETomek for oversampling the minority class and cleaning noisy samples
X_train_flattened = X_train.reshape(X_train.shape[0], -1)  # Flatten images for SMOTETomek
smotetomek = SMOTETomek(random_state=42)
X_resampled, y_resampled = smotetomek.fit_resample(X_train_flattened, y_train)

# Reshape back to image dimensions after SMOTE
X_resampled = X_resampled.reshape(-1, 224, 224, 3)


Class distribution in the training set: Counter({2: 150, 1: 70, 0: 64})


In [7]:
# Build the model using MobileNetV2
mobilenet = MobileNetV2(input_shape=(224, 224, 3), weights="imagenet", include_top=False)
mobilenet.trainable = False  # Freeze the MobileNetV2 layers


In [8]:
model = models.Sequential([
    mobilenet,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),  # Dense layer for feature learning
    layers.Dense(len(lb.classes_), activation='softmax')  # Output layer for classification
])


In [9]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])


In [10]:
# Train the model
model.fit(X_resampled, y_resampled, epochs=20, validation_data=(X_valid, y_valid))

Epoch 1/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.7044 - loss: 0.6352 - val_accuracy: 0.9180 - val_loss: 0.2777
Epoch 2/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 553ms/step - accuracy: 0.9449 - loss: 0.1487 - val_accuracy: 0.9508 - val_loss: 0.2038
Epoch 3/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 565ms/step - accuracy: 0.9945 - loss: 0.0412 - val_accuracy: 0.9672 - val_loss: 0.1490
Epoch 4/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 529ms/step - accuracy: 0.9924 - loss: 0.0246 - val_accuracy: 0.8852 - val_loss: 0.2849
Epoch 5/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 524ms/step - accuracy: 0.9898 - loss: 0.0481 - val_accuracy: 0.9672 - val_loss: 0.1146
Epoch 6/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 552ms/step - accuracy: 1.0000 - loss: 0.0117 - val_accuracy: 0.9344 - val_loss: 0.1309
Epoch 7/20
[1m15/15[0m [32m

<keras.src.callbacks.history.History at 0x199045370b0>

In [11]:

# Evaluate on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test accuracy:", test_acc)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step - accuracy: 0.9254 - loss: 0.1849
Test accuracy: 0.9193548560142517


In [12]:
# Generate predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3s/step


In [15]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Confusion Matrix and Classification Report
conf_matrix = confusion_matrix(y_test_classes, y_pred_classes)
print("Confusion Matrix:")
print(conf_matrix)

print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes, target_names=lb.classes_))


Confusion Matrix:
[[16  0  2]
 [ 0 12  1]
 [ 1  1 29]]
Classification Report:
              precision    recall  f1-score   support

     Healthy       0.94      0.89      0.91        18
    septoria       0.92      0.92      0.92        13
 stripe_rust       0.91      0.94      0.92        31

    accuracy                           0.92        62
   macro avg       0.92      0.92      0.92        62
weighted avg       0.92      0.92      0.92        62

