#Part 1 - Importing Dataset

In [None]:
# Importing necessary libraries from TensorFlow
import tensorflow as tf
from tensorflow.keras.preprocessing import image
import numpy as np

In [None]:
# Step 1: Mount Google Drive to access Kaggle API key (kaggle.json)

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Step 2: Configure Kaggle API access by copying kaggle.json from Google Drive

!mkdir ~/.kaggle/                             # Create a Kaggle folder
!cp "/content/drive/MyDrive/kaggle.json" ~/.kaggle/kaggle.json  # Replace with your path to kaggle.json
!chmod 600 ~/.kaggle/kaggle.json               # Secure the permissions for the file

# Step 3: Download the dataset from Kaggle using Kaggle API

!kaggle datasets download vipoooool/new-plant-diseases-dataset

Dataset URL: https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset
License(s): copyright-authors
Downloading new-plant-diseases-dataset.zip to /content
100% 2.70G/2.70G [00:20<00:00, 202MB/s]
100% 2.70G/2.70G [00:20<00:00, 139MB/s]


In [None]:
# Step 4: Unzip the dataset

from zipfile import ZipFile
zippedFile = "/content/new-plant-diseases-dataset.zip"  # The path of the downloaded zip file

# Extract the contents of the zip file
with ZipFile(zippedFile, 'r') as zip_ref:
    zip_ref.extractall('/content')  # Extract all files into the content folder
    zip_ref.close()

# Step 5: Define the paths for train and validation directories after extraction

train_dir = '/content/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train'
valid_dir = '/content/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid'

#Part 2 - Preprocessing

In [None]:
# Step 6: Data Preprocessing using ImageDataGenerator

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Initialize the ImageDataGenerator for training set with augmentations
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

# Initialize the ImageDataGenerator for validation set (only rescaling)
valid_datagen = ImageDataGenerator(rescale=1./255)

# Load the training data
train_set = train_datagen.flow_from_directory(train_dir,
                                              target_size=(64, 64),
                                              batch_size=32,
                                              class_mode='categorical')  # assuming multiple classes

# Load the validation data
valid_set = valid_datagen.flow_from_directory(valid_dir,
                                              target_size=(64, 64),
                                              batch_size=32,
                                              class_mode='categorical')

Found 70295 images belonging to 38 classes.
Found 17572 images belonging to 38 classes.


#Part 3 - Model

##Building the CNN Model

In [None]:
# Step 7: Build the CNN Model

import tensorflow as tf

# Initialize the CNN model
cnn = tf.keras.models.Sequential()

# Add the layers: Convolution, MaxPooling, Flattening, Dense, etc.
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))
cnn.add(tf.keras.layers.Dense(units=38, activation='softmax'))  # Assuming 38 classes in the plant disease dataset

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Training the Model

In [None]:
# Step 8: Compile the CNN

cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Step 9: Train the model

cnn.fit(x=train_set, validation_data=valid_set, epochs=25)

Epoch 1/25


  self._warn_if_super_not_called()


[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m510s[0m 231ms/step - accuracy: 0.4802 - loss: 1.8318 - val_accuracy: 0.7290 - val_loss: 0.8866
Epoch 2/25
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m474s[0m 216ms/step - accuracy: 0.7992 - loss: 0.6331 - val_accuracy: 0.7586 - val_loss: 0.7931
Epoch 3/25
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m469s[0m 213ms/step - accuracy: 0.8496 - loss: 0.4648 - val_accuracy: 0.8215 - val_loss: 0.5921
Epoch 4/25
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m459s[0m 209ms/step - accuracy: 0.8755 - loss: 0.3850 - val_accuracy: 0.8553 - val_loss: 0.4564
Epoch 5/25
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m513s[0m 214ms/step - accuracy: 0.8945 - loss: 0.3243 - val_accuracy: 0.8729 - val_loss: 0.4099
Epoch 6/25
[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m464s[0m 211ms/step - accuracy: 0.9069 - loss: 0.2879 - val_accuracy: 0.8821 - val_loss: 0.3781
Epo

<keras.src.callbacks.history.History at 0x7a97d578aa10>

## Saving the model

In [None]:
# Step 10: Save the trained model

cnn.save('/content/plant_disease_model.h5')



In [None]:
cnn.save('/content/plant_disease_model.keras')

#Part 4 - Plotting confusion matrix for accuracy

In [None]:
import tensorflow as tf

# Load the trained model
model = tf.keras.models.load_model('/content/plant_disease_model.h5')

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

# Initialize storage for images and labels
selected_images = []
selected_labels = []

# Extract 100 images per class from the validation set
class_count = {key: 0 for key in valid_set.class_indices.keys()}  # Count to track 100 images per class

for images, labels in valid_set:
    for i, label in enumerate(labels):
        class_name = list(valid_set.class_indices.keys())[np.argmax(label)]

        # Add 100 images per class
        if class_count[class_name] < 100:
            selected_images.append(images[i])
            selected_labels.append(label)
            class_count[class_name] += 1

        # Break the loop once we have 100 images for each class
        if all(count >= 100 for count in class_count.values()):
            break
    if all(count >= 100 for count in class_count.values()):
        break

# Convert selected images and labels to numpy arrays
selected_images = np.array(selected_images)
selected_labels = np.array(selected_labels)

# Step 2: Predict on the selected validation images
predictions = model.predict(selected_images)
predicted_classes = np.argmax(predictions, axis=1)

# Convert one-hot encoded true labels to integers
true_classes = np.argmax(selected_labels, axis=1)

# Step 3: Calculate accuracy and confusion matrix
accuracy = accuracy_score(true_classes, predicted_classes) * 100
print(f'Accuracy: {accuracy:.2f}%')

# Step 4: Generate confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)

# Plot the confusion matrix using Seaborn
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=valid_set.class_indices.keys(),
            yticklabels=valid_set.class_indices.keys())
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()
