# Importing the Libraries 


In [6]:
import zipfile
import os

# Define relative paths
zip_file_path = os.path.join('data', 'new-plant-diseases-dataset.zip')
extract_to_path = os.path.join('data', 'extracted')

# Ensure the extract directory exists
os.makedirs(extract_to_path, exist_ok=True)

# Extract ZIP file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_path)

print(f"Files extracted to: {extract_to_path}")



FileNotFoundError: [Errno 2] No such file or directory: 'data\\new-plant-diseases-dataset.zip'

In [5]:
import tensorflow as tf 
import matplotlib.pyplot as plt 
import pandas as pd 
import seaborn as sns 

ModuleNotFoundError: No module named 'tensorflow'

Data Preprocessing 


In [3]:


# Path to training dataset (relative to the script's location)
base_dir = os.path.dirname(os.path.abspath(__file__))  # Get the directory of the current script
train_data_path = os.path.join(base_dir, 'data', 'New Plant Diseases Dataset(Augmented)', 'train')

# Load the dataset
training_set = tf.keras.utils.image_dataset_from_directory(
    directory=train_data_path,
    labels="inferred",              # Automatically infer labels from directory structure
    label_mode="categorical",       # Use categorical labels for multi-class classification
    batch_size=32,                  # Specify batch size
    image_size=(128, 128),          # Resize images to 128x128
    shuffle=True,                   # Shuffle the data for training
    color_mode="rgb",               # Load images in RGB color mode
    interpolation="bilinear",       # Interpolation method for resizing
    verbose=True                    # Display loading progress
)



Found 70295 files belonging to 38 classes.


Validation Image Preprocessing


In [4]:
import tensorflow as tf
import os

# Path to validation dataset (relative to the script's location)
base_dir = os.path.dirname(os.path.abspath(__file__))  # Get the directory of the current script
valid_data_path = os.path.join(base_dir, 'data', 'New Plant Diseases Dataset(Augmented)', 'valid')

# Load the validation dataset
validation_set = tf.keras.utils.image_dataset_from_directory(
    directory=valid_data_path,
    labels="inferred",              # Automatically infer labels based on directory structure
    label_mode="categorical",       # Categorical labels for multi-class validation
    batch_size=32,                  # Define batch size for validation
    image_size=(128, 128),          # Resize images to 128x128
    shuffle=True,                   # Shuffle for varied validation batches
    color_mode="rgb",               # Load images in RGB format
    interpolation="bilinear",       # Method for resizing images
    verbose=True                    # Show progress while loading
)


Found 17572 files belonging to 38 classes.


In [5]:
training_set

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 128, 128, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 38), dtype=tf.float32, name=None))>

In [6]:
# Iterate through the training set
for images, labels in training_set:
    print("Images batch:", images)
    print("Images batch shape:", images.shape)
    print("Labels batch:", labels)
    print("Labels batch shape:", labels.shape)
    break  # Exit after the first batch




Images batch: tf.Tensor(
[[[[190.5  185.5  182.5 ]
   [190.25 185.25 182.25]
   [190.25 185.25 182.25]
   ...
   [205.   203.   206.  ]
   [205.75 203.75 206.75]
   [202.   200.   203.  ]]

  [[188.25 183.25 180.25]
   [193.25 188.25 185.25]
   [193.   188.   185.  ]
   ...
   [205.25 203.25 206.25]
   [206.   204.   207.  ]
   [203.5  201.5  204.5 ]]

  [[193.   188.   185.  ]
   [190.25 185.25 182.25]
   [189.   184.   181.  ]
   ...
   [207.   205.   208.  ]
   [208.   206.   209.  ]
   [207.   205.   208.  ]]

  ...

  [[ 86.25  94.25 105.25]
   [ 81.75  88.    99.5 ]
   [ 95.5   99.25 109.75]
   ...
   [200.25 190.25 188.25]
   [184.75 174.75 172.75]
   [188.5  178.5  176.5 ]]

  [[ 87.25  94.25 104.25]
   [ 87.5   94.5  104.5 ]
   [ 92.5   99.5  109.5 ]
   ...
   [192.25 182.25 180.25]
   [193.5  183.5  181.5 ]
   [189.75 179.75 177.75]]

  [[ 84.    88.    99.  ]
   [ 87.25  91.25 102.25]
   [ 85.75  92.75 102.75]
   ...
   [206.   196.   194.  ]
   [200.   190.   188.  ]
   [20

Building the Model

In [7]:
# Import necessary modules for building the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense, 
    Conv2D, 
    MaxPooling2D, 
    Flatten, 
    Dropout
)

In [9]:
model = Sequential()

Building Convolution Layer

In [10]:
# Add the first convolutional layer
model.add(Conv2D(
    filters=32,                 # Number of filters for the convolution
    kernel_size=3,              # Size of the convolution kernel
    padding='same',             # Preserve spatial dimensions of the input
    activation='relu',          # Use ReLU activation function
    input_shape=(128, 128, 3)   # Input shape: 128x128 RGB images
))

# Add the second convolutional layer
model.add(Conv2D(
    filters=32,
    kernel_size=3,
    padding='same',
    activation='relu'
))

# Add a max-pooling layer
model.add(MaxPooling2D(
    pool_size=2,    # Downsample using a 2x2 pool size
    strides=2       # Stride of 2 for the pooling operation
))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
# Add the third convolutional layer
model.add(Conv2D(
    filters=64,                 # Increased number of filters for deeper feature extraction
    kernel_size=3,              # Size of the convolution kernel
    padding='same',             # Preserve spatial dimensions of the input
    activation='relu',          # Use ReLU activation function
    input_shape=(128, 128, 3)   # Input shape (only required for the first layer in the model)
))

# Add the fourth convolutional layer
model.add(Conv2D(
    filters=64,
    kernel_size=3,
    padding='same',
    activation='relu'
))

# Add another max-pooling layer
model.add(MaxPooling2D(
    pool_size=2,    # Downsample using a 2x2 pool size
    strides=2       # Stride of 2 for the pooling operation
))


In [12]:
# Add the fifth convolutional layer
model.add(Conv2D(
    filters=128,                # Increased filters for capturing more complex features
    kernel_size=3,              # Kernel size remains 3x3 for consistency
    padding='same',             # Maintain spatial dimensions
    activation='relu',          # ReLU activation for non-linearity
    input_shape=(128, 128, 3)   # Input shape (only necessary for the first layer)
))

# Add the sixth convolutional layer
model.add(Conv2D(
    filters=128,
    kernel_size=3,
    padding='same',
    activation='relu'
))

# Add another max-pooling layer
model.add(MaxPooling2D(
    pool_size=2,    # Pool size of 2x2 for downsampling
    strides=2       # Stride of 2 for pooling
))


In [13]:
# Add the seventh convolutional layer
model.add(Conv2D(
    filters=256,                # Increased filters for capturing more intricate features
    kernel_size=3,              # Consistent kernel size of 3x3
    padding='same',             # Preserve spatial dimensions
    activation='relu',          # ReLU activation for non-linearity
    input_shape=(128, 128, 3)   # Input shape (only required for the first layer)
))

# Add the eighth convolutional layer
model.add(Conv2D(
    filters=256,
    kernel_size=3,
    padding='same',
    activation='relu'
))

# Add a max-pooling layer
model.add(MaxPooling2D(
    pool_size=2,    # Use a pool size of 2x2 for downsampling
    strides=2       # Stride of 2 for pooling operation
))


In [15]:
# Add the ninth convolutional layer
model.add(Conv2D(
    filters=512,                # Increased filters for highly detailed feature extraction
    kernel_size=3,              # Consistent 3x3 kernel size
    padding='same',             # Maintain input dimensions
    activation='relu',          # ReLU activation for non-linearity
    input_shape=(128, 128, 3)   # Input shape (only necessary for the first layer)
))

# Add the tenth convolutional layer
model.add(Conv2D(
    filters=512,
    kernel_size=3,
    padding='same',
    activation='relu'
))

# Add another max-pooling layer
model.add(MaxPooling2D(
    pool_size=2,    # Pool size of 2x2 for downsampling
    strides=2       # Stride of 2 for pooling
))


In [16]:
model.add(Dropout(0.25)) 

In [17]:
model.add(Flatten())

In [18]:
model.add(Dense(units=1500, activation='relu')) 

In [19]:
model.add(Dropout(0.4))

In [20]:
model.add(Dense(units=38, activation='softmax')) 

Model Compile

In [21]:
# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Adam optimizer with a low learning rate
    loss='categorical_crossentropy',                           # Loss function for multi-class classification
    metrics=['accuracy']                                       # Metric to monitor model accuracy during training
)


In [22]:
model.summary()

Model Training 

In [None]:
# Train the model
training_history = model.fit(
    x=training_set,             # Training dataset
    validation_data=validation_set,  # Validation dataset for performance monitoring
    epochs=10                   # Number of training epochs
)


Epoch 1/10
[1m 506/2197[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m37:45[0m 1s/step - accuracy: 0.0903 - loss: 3.3946

Model Evaluation 

In [None]:
# Evaluate the model on the training dataset
train_loss, train_acc = model.evaluate(
    training_set  # Training dataset used for evaluation
)

# Print the training loss and accuracy
print(f"Training Loss: {train_loss:.4f}")
print(f"Training Accuracy: {train_acc:.4f}")


In [None]:
print(train_loss, train_acc)

In [None]:
#Model on Validation set
val_loss, val_acc = model.evaluate(validation_set)
print(val_loss, val_acc)

Saving the Model

In [None]:
model.save("trained_model.keras")

In [None]:
training_history.history

In [None]:
import json

# Save training history to a JSON file
with open("training_history.json", "w") as file:
    json.dump(training_history.history, file)  # Serialize the training history dictionary to JSON

print("Training history saved to training_history.json")


Visualization

In [None]:
import matplotlib.pyplot as plt

# Define the number of epochs
epochs = [i for i in range(1, 11)]

# Plot training accuracy
plt.plot(epochs, training_history.history['accuracy'], color='red', label='Training Accuracy')

# Add labels, title, and legend
plt.xlabel("Number of Epochs")                  # Label for x-axis
plt.ylabel("Training Accuracy")                # Label for y-axis
plt.title("Visualization of Training Data Accuracy")  # Title for the plot
plt.legend()                                   # Show the legend

# Display the plot
plt.show()


In [None]:
# Define the number of epochs
epochs = [i for i in range(1, 11)]

# Plot validation accuracy
plt.plot(epochs, training_history.history['val_accuracy'], color='blue', label='Validation Accuracy')

# Add labels, title, and legend
plt.xlabel("Number of Epochs")                    # Label for x-axis
plt.ylabel("Validation Accuracy")                # Label for y-axis
plt.title("Visualization of Validation Data Accuracy")  # Title for the plot
plt.legend()                                     # Display the legend

# Display the plot
plt.show()

Model evaluation


In [None]:
# Get the class names from the validation set
class_names = validation_set.class_names

# Display the class names
class_names


In [None]:
# Path to the test dataset (relative to the script's location)
base_dir = os.path.dirname(os.path.abspath(__file__))  # Get the directory of the current script
test_data_path = os.path.join(base_dir, 'data', 'New Plant Diseases Dataset(Augmented)', 'valid')

# Load the test dataset
test_set = tf.keras.utils.image_dataset_from_directory(
    directory=test_data_path,      # Directory containing the test dataset
    labels="inferred",            # Automatically infer labels from the directory structure
    label_mode="categorical",     # Use categorical labels for multi-class classification
    batch_size=32,                # Batch size for loading the dataset
    image_size=(128, 128),        # Resize images to 128x128
    shuffle=False,                # Disable shuffling for consistent testing
    color_mode="rgb",             # Load images in RGB format
    interpolation="bilinear",     # Interpolation method for resizing
    verbose=True                  # Show detailed progress information
)

# Verify that the test dataset is loaded correctly
print(f"Test dataset loaded with {len(test_set)} batches.")



In [None]:
# Make predictions on the test set
y_pred = model.predict(test_set)

# Display the predictions and their shape
print("Predictions:", y_pred)
print("Predictions Shape:", y_pred.shape)


In [None]:
predicted_categories = tf.argmax(y_pred, axis=1)
predicted_categories
true_categories = tf.concat([y for x,y in test_set], axis = 0)
print(true_categories)

In [None]:
# Convert one-hot encoded labels to class indices
Y_true = tf.argmax(true_categories, axis=1)

# Display the class indices
print("True Class Labels:", Y_true.numpy())


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(Y_true, predicted_categories, target_names=class_name))

Visualization of Confusion Matrix

In [None]:
cm = confusion_matrix(Y_true, predicted_categories)
cm.shape

In [None]:
# Plot the confusion matrix
plt.figure(figsize=(40, 40))  # Set the figure size for better visibility
sns.heatmap(
    cm,                         # Confusion matrix data
    annot=True,                 # Annotate cells with their values
    annot_kws={'size': 10},     # Set the font size of annotations
    cmap='viridis',             # Use a color map for better contrast (optional)
    fmt='g'                     # Display annotations as integers
)

# Add axis labels and title with larger fonts
plt.xlabel("Predicted Class", fontsize=20)
plt.ylabel("Actual Class", fontsize=20)
plt.title("Plant Disease Prediction Confusion Matrix", fontsize=25)

# Show the plot
plt.show()