<a href="https://colab.research.google.com/github/ganjire/ML2_Project/blob/main/ML2_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Project Idea: Automated Disease Detection in Plant Leaves using Convolutional Neural Networks (CNNs)**

**Project Goal/Motivation**

The goal of this project is to develop an automated system that can identify and classify plant diseases from images of plant leaves. This is crucial for agricultural technology as early detection of diseases can lead to timely intervention, reducing both the spread of disease and economic losses. This project will help in understanding the practical application of CNNs in a real-world problem, exploring various architectures, and utilizing transfer learning for effective model performance on a specialized task.

In [None]:
import numpy as np
import pickle
import cv2
from os import listdir
import os
import tensorflow as tf
import keras
from sklearn.preprocessing import LabelBinarizer
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder



from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


#**Data Collection and Preparation**

In [None]:
# Download PlantVillage-Dataset from Kaggle
directory_root = drive.mount('/content/drive')
dataset_root = '/content/drive/My Drive/PlantVillage'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

data_path = '/content/drive/My Drive/PlantVillage'  # Path to Folder

# Right Path to Main Folder
categories = os.listdir(data_path)
print("Categories (Classes):", categories)

# Optional: Number of Images in Subfolders
for category in categories:
    category_path = os.path.join(data_path, category)
    num_images = len(os.listdir(category_path))
    print(f"{category}: {num_images} images")


Categories (Classes): ['Tomato__Target_Spot', 'Potato___Early_blight', 'Tomato__Tomato_YellowLeaf__Curl_Virus', 'Tomato_healthy', 'Tomato__Tomato_mosaic_virus', 'Tomato_Early_blight', 'Potato___Late_blight', 'Tomato_Leaf_Mold', 'Tomato_Septoria_leaf_spot', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato_Bacterial_spot', 'Pepper__bell___Bacterial_spot', 'Tomato_Late_blight', 'Potato___healthy', 'Pepper__bell___healthy']
Tomato__Target_Spot: 703 images
Potato___Early_blight: 500 images
Tomato__Tomato_YellowLeaf__Curl_Virus: 1605 images
Tomato_healthy: 799 images
Tomato__Tomato_mosaic_virus: 190 images
Tomato_Early_blight: 504 images
Potato___Late_blight: 500 images
Tomato_Leaf_Mold: 481 images
Tomato_Septoria_leaf_spot: 886 images
Tomato_Spider_mites_Two_spotted_spider_mite: 838 images
Tomato_Bacterial_spot: 1069 images
Pepper__bell___Bacterial_spot: 499 images
Tomato_Late_blight: 955 images
Potato___healthy: 76 images
Pepper__bell___healthy: 743 images


In [None]:
from PIL import Image
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder

def load_images_and_labels(data_directory):
    images = []
    labels = []
    categories = os.listdir(data_directory)

    # Create an instance of LabelEncoder
    label_encoder = LabelEncoder()
    # Fit the encoder to the categories (this assigns an integer to each category)
    label_encoder.fit(categories)

    for category in categories:
        category_path = os.path.join(data_directory, category)
        for image_file in os.listdir(category_path):
            image_path = os.path.join(category_path, image_file)
            try:
                with Image.open(image_path) as img:
                    img = img.resize((128, 128))  # Resize the image
                    img = np.array(img)
                    if img.shape == (128, 128, 3):  # Check if the image has three channels
                        images.append(img)
                        # Encode the category name into an integer
                        labels.append(label_encoder.transform([category])[0])
            except Exception as e:
                print(f"Can't load image {image_path}: {e}")

    return np.array(images), np.array(labels)

# Assuming data_path is defined and points to your 'plantvillage' folder
data_path = '/content/drive/My Drive/PlantVillage'
images, labels = load_images_and_labels(data_path)

print(f"Loaded {len(images)} images.")
print(f"Loaded {len(labels)} labels.")


Can't load image /content/drive/My Drive/PlantVillage/Tomato__Tomato_YellowLeaf__Curl_Virus/svn-r6Yb5c: cannot identify image file '/content/drive/My Drive/PlantVillage/Tomato__Tomato_YellowLeaf__Curl_Virus/svn-r6Yb5c'


KeyboardInterrupt: 

**Load images and labels fron directory**

In [None]:
from PIL import Image
import numpy as np

def load_images_and_labels(data_directory):
    images = []
    labels = []
    categories = os.listdir(data_directory)

    for label, category in enumerate(categories):
        category_path = os.path.join(data_directory, category)
        for image_file in os.listdir(category_path):
            image_path = os.path.join(category_path, image_file)
            try:
                with Image.open(image_path) as img:
                    img = img.resize((128, 128))
                    img = np.array(img)
                    if img.shape == (128, 128, 3):
                        images.append(img)
                        labels.append(label)
            except Exception as e:
                print(f"Can't load image {image_path}: {e}")

    return np.array(images), np.array(labels)

images, labels = load_images_and_labels(data_path)
print(f"Loaded Images: {len(images)}")
print(f"Loaded Images: {len(labels)}")


Can't load image /content/drive/My Drive/PlantVillage/Tomato__Tomato_YellowLeaf__Curl_Virus/svn-r6Yb5c: cannot identify image file '/content/drive/My Drive/PlantVillage/Tomato__Tomato_YellowLeaf__Curl_Virus/svn-r6Yb5c'
Loaded Images: 10347
Loaded Images: 10347


In [None]:
unique_labels, counts = np.unique(labels, return_counts=True)
print("Unique labels (classes):", unique_labels)
print("Number of images per class:", counts)


Unique labels (classes): [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
Number of images per class: [ 703  500 1604  799  190  504  500  481  886  838 1069  499  955   76
  743]


In [None]:
# First Split: Split Training Data (70%) und rest (30%)
X_train, X_temp, y_train, y_temp = train_test_split(
    images, labels, test_size=0.3, random_state=42, stratify=labels)

# Second Split: Split from Validation Data (20% of 100% -> 2/3 of 30%) and Test Data (10% of 100% -> 1/3 of 30%)
X_valid, X_test, y_valid, y_test = train_test_split(
    X_temp, y_temp, test_size=1/3, random_state=42, stratify=y_temp)


In [None]:
# Assuming X_train, y_train, X_valid, y_valid, X_test, y_test are already defined

# Create generators for training and validation data
#train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
#valid_generator = test_datagen.flow(X_valid, y_valid, batch_size=32)
#test_generator = test_datagen.flow(X_test, y_test, batch_size=32)


**Data Augumentation**

In [None]:
# Define the ImageDataGenerator for training with augmentation
#train_datagen = ImageDataGenerator(
  #  rescale=1./255,  # Normalize the image data to [0, 1]
   # rotation_range=40,  # Randomly rotate images in the range (degrees, 0 to 180)
  #  width_shift_range=0.2,  # Randomly horizontal shift images
 #   height_shift_range=0.2,  # Randomly vertical shift images
#    shear_range=0.2,  # Randomly apply shearing transformations
    #zoom_range=0.2,  # Randomly zoom image
   # horizontal_flip=True,  # Randomly flip images horizontally
  #  fill_mode='nearest'  # Fill in newly created pixels which can appear after a rotation or a width/height shift
#)

# Define the ImageDataGenerator for validation and test sets (only rescaling)
#test_datagen = ImageDataGenerator(rescale=1./255)


**Visualization of Augumentation**

In [None]:
# Display a batch of images
def plot_images(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20, 20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# Generate a batch of images
images, _ = next(train_generator)
plot_images(images[:5])  # Plot the first 5 images

#**Data Modeling**

**Create 1st Model**





In [None]:
# Define the CNN model
model_cnn = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(15, activation='softmax')
])

# Compile the model
model_cnn.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

# Print model summary
model_cnn.summary()




Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_55 (Conv2D)          (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d_43 (MaxPooli  (None, 63, 63, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_56 (Conv2D)          (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_44 (MaxPooli  (None, 30, 30, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_57 (Conv2D)          (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_45 (MaxPooli  (None, 14, 14, 128)     

In [None]:
from tensorflow.keras.utils import to_categorical

# Convert labels to one-hot encoding
y_train_one_hot = to_categorical(y_train, num_classes=15)
y_valid_one_hot = to_categorical(y_valid, num_classes=15)
y_test_one_hot = to_categorical(y_test, num_classes=15)


In [None]:
# Train the model
history = model_cnn.fit(
    X_train, y_train_one_hot,
    batch_size=32,  # Bestimmen Sie die Batch-Größe nach Ihren Ressourcen
    epochs=10,  # Anzahl der Epochen
    validation_data=(X_valid, y_valid_one_hot)
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model_cnn.save('/content/drive/My Drive/model_cnn.h5')

**2nd Model**

A deeper model with differnet filter sizes and more layers.

In [None]:
model_cnn2 = Sequential([
    Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(256, 256, 3)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),
    Conv2D(256, (3, 3), activation='relu', padding='same'),
    Conv2D(256, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),
    Conv2D(512, (3, 3), activation='relu', padding='same'),
    Conv2D(512, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(15, activation='softmax')
])

# Compile the model
model_cnn2.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9),
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

# Print model summary
model_cnn2.summary()


**Create 3rd Model**

Different activation functions and dropout rates.

In [None]:
model_cnn3 = Sequential([
    Conv2D(32, (3, 3), input_shape=(256, 256, 3)),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3)),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3)),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(2, 2),
    Conv2D(256, (3, 3)),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.3),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(15, activation='softmax')
])

# Compile the model
model_cnn3.compile(optimizer=Adam(learning_rate=0.001),
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

# Print model summary
model_cnn3.summary()
