Assignment 3

Implementing Convolutional Neural Network(CNNs) for the task of Plant Disease detection.

In [32]:
import numpy as np
import pickle
import cv2
from os import listdir
import tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Normalization, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Flatten, Dropout, Dense
from tensorflow.keras import backend as k
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [None]:
import requests
import os

# Create directory for the dataset if it doesn't exist
if not os.path.exists("data"):
    os.makedirs("data")
# Download the dataset
url = "https://github.com/spMohanty/PlantVillage-Dataset/archive/refs/heads/master.zip"
response = requests.get(url, stream=True)
dataset_path = "data/plantvillage.zip"

with open(dataset_path, 'wb') as f:
    for chunk in response.iter_content(chunk_size=8192):
        if chunk:
            f.write(chunk)

print("Dataset successfully downloaded!")

# Step 2: Unzip the dataset
import zipfile

extract_path = "data/PlantVillage"
with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset successfully extracted!")

# Step 3: Check dataset structure
print("Folders in the dataset:", os.listdir(extract_path))


In [None]:
import os
import random
import shutil

# Define the dataset path
dataset_path = "data/PlantVillage/PlantVillage-Dataset-master/raw/color"
reduced_dataset_path = "PlantVillage_Final"  # New folder for reduced dataset

# Create the reduced dataset directory
if not os.path.exists(reduced_dataset_path):
    os.makedirs(reduced_dataset_path)

# Process each disease category
for category in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, category)
    new_class_path = os.path.join(reduced_dataset_path, category)

    if not os.path.exists(new_class_path):
        os.makedirs(new_class_path)

    # Get all images in the category
    images = os.listdir(class_path)
    total_images = len(images)
    
    # Keep only 25% of the images
    keep_count = int(0.1 * total_images)
    
    # Randomly select images to retain
    selected_images = random.sample(images, keep_count)

    # Copy selected images to the new dataset folder
    for img in selected_images:
        src = os.path.join(class_path, img)
        dest = os.path.join(new_class_path, img)
        shutil.copy(src, dest)

    print(f"Processed {category}: Retained {keep_count}/{total_images} images.")

print("Dataset size reduction complete! The new dataset is saved in:", reduced_dataset_path)

In [None]:
# Define dataset path
dataset_path = "PlantVillage_Final/"

# Get class labels (disease categories)
class_labels = os.listdir(dataset_path)
print("Classes:", class_labels)

In [40]:
# Image parameters
IMG_SIZE = 128  # Resize images to 128x128
data = []
labels = []

# Load images and resize them
for category in class_labels:
    class_path = os.path.join(dataset_path, category)
    class_index = class_labels.index(category)  # Convert class name to index

    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)

        if img is not None:
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))  # Resize
            data.append(img)
            labels.append(class_index)

# Convert to NumPy arrays
data = np.array(data) / 255.0  # Normalize images
labels = np.array(labels)

In [None]:
# Split into training and validation sets
xtrain, xtest, ytrain, ytest = train_test_split(data, labels, test_size=0.2, random_state=42)
print(f"Training samples: {len(xtrain)}, Validation samples: {len(xtest)}")

In [42]:
# Convert labels to categorical
ytrain = to_categorical(ytrain, num_classes=len(class_labels))
ytest = to_categorical(ytest, num_classes=len(class_labels))

In [43]:
xtrain = xtrain.reshape((xtrain.shape[0], IMG_SIZE, IMG_SIZE, 3))

In [44]:
model = Sequential()

In [None]:
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # Removed comma
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # Prevent overfitting
model.add(Dense(len(class_labels), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(xtrain, ytrain, validation_data=(xtest, ytest), epochs=5, batch_size=32)

# Save the trained model
model.save("plant_disease_model.h5")

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training vs Validation Accuracy')
plt.show()