In [95]:
import kagglehub
path = kagglehub.dataset_download("emmarex/plantdisease")
print(path)

Using Colab cache for faster access to the 'plantdisease' dataset.
/kaggle/input/plantdisease


In [96]:
import os
print(path)

/kaggle/input/plantdisease


In [97]:
dataset_path = "/kaggle/input/plantdisease/PlantVillage"

In [98]:
classes = os.listdir(dataset_path)

In [99]:
print("Total folders (classes):", len(classes))
print("Class names:")
for cls in classes:
    print(cls)

Total folders (classes): 15
Class names:
Pepper__bell___Bacterial_spot
Potato___healthy
Tomato_Leaf_Mold
Tomato__Tomato_YellowLeaf__Curl_Virus
Tomato_Bacterial_spot
Tomato_Septoria_leaf_spot
Tomato_healthy
Tomato_Spider_mites_Two_spotted_spider_mite
Tomato_Early_blight
Tomato__Target_Spot
Pepper__bell___healthy
Potato___Late_blight
Tomato_Late_blight
Potato___Early_blight
Tomato__Tomato_mosaic_virus


In [100]:
print("Total Number of Classes:", len(classes))
total_images = 0

for cls in classes:
    class_path = os.path.join(dataset_path, cls)
    images = os.listdir(class_path)
    total_images += len(images)

print("Total number of images in dataset:", total_images)


Total Number of Classes: 15
Total number of images in dataset: 20639


In [101]:
class_distribution = {}

for cls in classes:
    class_path = os.path.join(dataset_path, cls)
    class_distribution[cls] = len(os.listdir(class_path))

# Print class-wise count
for cls, count in class_distribution.items():
    print(f"{cls}: {count} images")


Pepper__bell___Bacterial_spot: 997 images
Potato___healthy: 152 images
Tomato_Leaf_Mold: 952 images
Tomato__Tomato_YellowLeaf__Curl_Virus: 3209 images
Tomato_Bacterial_spot: 2127 images
Tomato_Septoria_leaf_spot: 1771 images
Tomato_healthy: 1591 images
Tomato_Spider_mites_Two_spotted_spider_mite: 1676 images
Tomato_Early_blight: 1000 images
Tomato__Target_Spot: 1404 images
Pepper__bell___healthy: 1478 images
Potato___Late_blight: 1000 images
Tomato_Late_blight: 1909 images
Potato___Early_blight: 1000 images
Tomato__Tomato_mosaic_virus: 373 images


In [94]:
print(classes)

['Pepper__bell___Bacterial_spot', 'Potato___healthy', 'Tomato_Leaf_Mold', 'Tomato__Tomato_YellowLeaf__Curl_Virus', 'Tomato_Bacterial_spot', 'Tomato_Septoria_leaf_spot', 'Tomato_healthy', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato_Early_blight', 'Tomato__Target_Spot', 'Pepper__bell___healthy', 'Potato___Late_blight', 'Tomato_Late_blight', 'Potato___Early_blight', 'Tomato__Tomato_mosaic_virus']


In [112]:
import tensorflow as tf

In [119]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_size =(224,224)
batch_size = 32
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range =20,
    zoom_range = 0.2,
    horizontal_flip =True,
    validation_split =0.2
)
train_data = train_datagen.flow_from_directory(dataset_path,
target_size = img_size,
 batch_size = batch_size,
      class_mode = "categorical",
      subset ="training"
      )
val_data =  train_datagen.flow_from_directory(
    dataset_path,
    target_size = img_size,
    batch_size = batch_size,
    class_mode ="categorical",
    subset = "validation"
)

Found 16516 images belonging to 15 classes.
Found 4122 images belonging to 15 classes.


In [None]:
import matplotlib.pyplot as plt

images, labels = next(train_data)

plt.figure(figsize=(10,10))
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.imshow(images[i])
    plt.axis("off")
plt.suptitle("Sample Images from Plant Disease Dataset")
plt.show()


In [120]:
print(train_data.class_indices)


{'Pepper__bell___Bacterial_spot': 0, 'Pepper__bell___healthy': 1, 'Potato___Early_blight': 2, 'Potato___Late_blight': 3, 'Potato___healthy': 4, 'Tomato_Bacterial_spot': 5, 'Tomato_Early_blight': 6, 'Tomato_Late_blight': 7, 'Tomato_Leaf_Mold': 8, 'Tomato_Septoria_leaf_spot': 9, 'Tomato_Spider_mites_Two_spotted_spider_mite': 10, 'Tomato__Target_Spot': 11, 'Tomato__Tomato_YellowLeaf__Curl_Virus': 12, 'Tomato__Tomato_mosaic_virus': 13, 'Tomato_healthy': 14}


In [121]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(224,224,3)),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_data.num_classes, activation='softmax')
])

model.summary()


In [122]:
model.summary()

In [133]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=1
)

[1m249/517[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m17:31[0m 4s/step - accuracy: 0.5842 - loss: 1.3108

In [None]:
import matplotlib.pyplot as plt

# Accuracy graph
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title('Model Accuracy')
plt.show()

# Loss graph
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title('Model Loss')
plt.show()


In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_data = test_datagen.flow_from_directory(
    dataset_path,
    target_size=(224,224),
    batch_size=25,
    class_mode='categorical',
    shuffle=False
)

test_loss, test_accuracy = model.evaluate(test_data)
print("Test Accuracy ", test_accuracy * 100)


In [None]:
val_loss, val_accuracy = model.evaluate(val_data)
print("Validation Accuracy:", val_accuracy)
print("Validation Loss:", val_loss)

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Get true labels
true_classes = test_data.classes

# Predict classes
pred_probs = model.predict(test_data)
pred_classes = np.argmax(pred_probs, axis=1)

# Confusion matrix
cm = confusion_matrix(true_classes, pred_classes)

# Plot
plt.figure(figsize=(10,8))
sns.heatmap(cm, cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
