In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import shutil
from collections import Counter

In [4]:
# Paths to datasets
train_dir = "D:\PFE\D2\wdd2017\Training"
val_dir = "D:\PFE\D2\wdd2017\Validation"
test_dir = "D:\PFE\D2\wdd2017\Testing"

In [5]:
# Directories for balanced datasets
balanced_train_dir = "balanced_train_data"
balanced_val_dir = "balanced_val_data"

# Ensure output directories exist
os.makedirs(balanced_train_dir, exist_ok=True)
os.makedirs(balanced_val_dir, exist_ok=True)

In [6]:
# Analyze class distribution function
def analyze_class_distribution(directory):
    """
    Counts the number of images in each class for a given directory.
    """
    class_counts = {}
    for class_label in os.listdir(directory):
        class_dir = os.path.join(directory, class_label)
        if os.path.isdir(class_dir):
            class_counts[class_label] = len(os.listdir(class_dir))
    return Counter(class_counts)

In [7]:
# Analyze datasets
train_class_counts = analyze_class_distribution(train_dir)
val_class_counts = analyze_class_distribution(val_dir)

print("Original Training Class Counts:", train_class_counts)
print("Original Validation Class Counts:", val_class_counts)

# Find the maximum class count (target balance)
max_count = max(train_class_counts.values())
print(f"Target number of images per class: {max_count}")

Original Training Class Counts: Counter({'Healthy': 1104, 'Yellow_Rust': 900, 'Brown_Rust': 890, 'Loose_Smut': 700, 'Septoria': 280})
Original Validation Class Counts: Counter({'Healthy': 312, 'Yellow_Rust': 300, 'Brown_Rust': 200, 'Loose_Smut': 140, 'Septoria': 35})
Target number of images per class: 1104


In [8]:
# Find the maximum class count (target balance)
max_count = max(train_class_counts.values())
print(f"Target number of images per class: {max_count}")

# Augmentation generator for balancing underrepresented classes
augmentation_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest'
)

Target number of images per class: 1104


In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
import shutil

# Augmentation generator
augmentation_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest'
)

def balance_dataset(source_dir, target_dir, class_counts, max_count, augment=False):
    """
    Balances a dataset by copying existing images and optionally augmenting underrepresented classes.
    """
    for class_label, count in class_counts.items():
        src_class_dir = os.path.join(source_dir, class_label)
        dst_class_dir = os.path.join(target_dir, class_label)
        os.makedirs(dst_class_dir, exist_ok=True)

        # Copy existing images
        for file in os.listdir(src_class_dir):
            shutil.copy(os.path.join(src_class_dir, file), dst_class_dir)

        # Augment images if needed
        if augment and count < max_count:
            augmentation_target = max_count - count
            print(f"Augmenting {augmentation_target} images for class {class_label}...")

            # Augment images
            augmented_count = 0
            # Create a flow for augmenting individual images
            for img_file in os.listdir(src_class_dir):
                if augmented_count >= augmentation_target:
                    break

                img_path = os.path.join(src_class_dir, img_file)
                # Open the image to augment
                img = load_img(img_path)
                x = img_to_array(img)  # Convert image to array
                x = x.reshape((1,) + x.shape)  # Reshape for flow

                # Create a generator that will save augmented images
                i = 0
                for batch in augmentation_datagen.flow(x, batch_size=1, save_to_dir=dst_class_dir,
                                                       save_prefix="aug", save_format="jpeg"):
                    i += 1
                    if i >= 1:  # Only one augmentation per image
                        break
                augmented_count += 1

    print(f"Dataset balanced successfully in: {target_dir}")

# Balance training and validation datasets
balance_dataset(train_dir, balanced_train_dir, train_class_counts, max_count, augment=True)
balance_dataset(val_dir, balanced_val_dir, val_class_counts, max_count, augment=True)


Augmenting 214 images for class Brown_Rust...
Augmenting 404 images for class Loose_Smut...
Augmenting 824 images for class Septoria...
Augmenting 204 images for class Yellow_Rust...
Dataset balanced successfully in: balanced_train_data
Augmenting 904 images for class Brown_Rust...
Augmenting 792 images for class Healthy...
Augmenting 964 images for class Loose_Smut...
Augmenting 1069 images for class Septoria...
Augmenting 804 images for class Yellow_Rust...
Dataset balanced successfully in: balanced_val_data


In [12]:
# Check balanced datasets
def check_balanced_distribution(directory):
    """
    Checks and prints the class distribution in the balanced dataset.
    """
    balanced_counts = analyze_class_distribution(directory)
    print(f"Class counts in {directory}: {balanced_counts}")
    return balanced_counts

# Check distributions
train_balanced_counts = check_balanced_distribution(balanced_train_dir)
val_balanced_counts = check_balanced_distribution(balanced_val_dir)

Class counts in balanced_train_data: Counter({'Healthy': 1104, 'Brown_Rust': 1103, 'Yellow_Rust': 1101, 'Loose_Smut': 1099, 'Septoria': 557})
Class counts in balanced_val_data: Counter({'Healthy': 618, 'Yellow_Rust': 594, 'Brown_Rust': 399, 'Loose_Smut': 279, 'Septoria': 70})


In [14]:
# Check distributions
train_balanced_counts = check_balanced_distribution(balanced_train_dir)
val_balanced_counts = check_balanced_distribution(balanced_val_dir)

# Set up the ImageDataGenerators for training and validation sets
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values to [0, 1]
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Load training and validation data
train_generator = train_datagen.flow_from_directory(
    balanced_train_dir,
    target_size=(224, 224),  # Resize images to match model input size
    batch_size=32,
    class_mode='categorical'  # Assuming a classification task
)

validation_generator = val_datagen.flow_from_directory(
    balanced_val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)



Class counts in balanced_train_data: Counter({'Healthy': 1104, 'Brown_Rust': 1103, 'Yellow_Rust': 1101, 'Loose_Smut': 1099, 'Septoria': 557})
Class counts in balanced_val_data: Counter({'Healthy': 618, 'Yellow_Rust': 594, 'Brown_Rust': 399, 'Loose_Smut': 279, 'Septoria': 70})
Found 4931 images belonging to 5 classes.
Found 1960 images belonging to 5 classes.


In [16]:
# Build the CNN model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras import layers, models
import os
import shutil
from tensorflow.keras.optimizers import Adam

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(train_generator.class_indices), activation='softmax')  # Output layer
])

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=30,  # Adjust the number of epochs based on your requirements
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)

# Evaluate the model
loss, accuracy = model.evaluate(validation_generator)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Save the trained model
model.save('wheat_disease_detection_model.h5')

  self._warn_if_super_not_called()


Epoch 1/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m749s[0m 5s/step - accuracy: 0.4766 - loss: 1.2934 - val_accuracy: 0.8007 - val_loss: 0.6369
Epoch 2/30
[1m  1/154[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10:44[0m 4s/step - accuracy: 0.6562 - loss: 0.9145



[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 341ms/step - accuracy: 0.6562 - loss: 0.9145 - val_accuracy: 0.7587 - val_loss: 0.7694
Epoch 3/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m486s[0m 3s/step - accuracy: 0.7624 - loss: 0.6658 - val_accuracy: 0.7941 - val_loss: 0.7567
Epoch 4/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 309ms/step - accuracy: 0.9375 - loss: 0.3487 - val_accuracy: 0.7418 - val_loss: 0.9019
Epoch 5/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m566s[0m 4s/step - accuracy: 0.8142 - loss: 0.5343 - val_accuracy: 0.8509 - val_loss: 0.6499
Epoch 6/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 315ms/step - accuracy: 0.9375 - loss: 0.2866 - val_accuracy: 0.8571 - val_loss: 0.6658
Epoch 7/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m483s[0m 3s/step - accuracy: 0.8321 - loss: 0.4726 - val_accuracy: 0.7818 - val_loss: 1.1118
Epoch 8/30
[1m154/154[0m 



Validation Loss: 0.9671390056610107
Validation Accuracy: 0.8668367266654968


In [44]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


  self._warn_if_super_not_called()


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3s/step - accuracy: 0.8946 - loss: 0.3500
Test Loss: 0.6083817481994629
Test Accuracy: 0.811965823173523


In [19]:
# Create a test data generator
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Load test data
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),  # Resize images to match model input size
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Important: Do not shuffle test data
)


Found 693 images belonging to 5 classes.


In [22]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 4s/step - accuracy: 0.4500 - loss: 4.3689
Test Loss: 6.886234760284424
Test Accuracy: 0.48917749524116516


In [42]:
test_dir = "D:\PFE\Datasets\Testing"

In [39]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - accuracy: 0.8897 - loss: 0.4025
Test Loss: 0.6879528760910034
Test Accuracy: 0.805084764957428


In [43]:
# Create a test data generator
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Load test data
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),  # Resize images to match model input size
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Important: Do not shuffle test data
)


Found 117 images belonging to 5 classes.


In [38]:
# Get predictions for the test set
predictions = model.predict(test_generator)

# Convert predictions to class indices
predicted_classes = predictions.argmax(axis=1)

# Get the true class labels
true_classes = test_generator.classes

# Compare predicted vs true labels
from sklearn.metrics import classification_report, confusion_matrix

print("Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=test_generator.class_indices.keys()))

print("Confusion Matrix:")
print(confusion_matrix(true_classes, predicted_classes))


  self._warn_if_super_not_called()


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step
Classification Report:


ValueError: Number of classes, 4, does not match size of target_names, 5. Try specifying the labels parameter