# Crop Disease Predictor Model Training

In [2]:
# Importing the Necessary libraries for training model on image data using Keras.

import keras                                                 # high-level API for building and training deep learning models
from keras.preprocessing.image import ImageDataGenerator     # keras.preprocessing.image: provides image preprocessing utilities, 
                                                             # ImageDataGenerator: loading and augmenting image data on-the-fly
from keras.optimizers import Adam                            # keras.optimizers: provides a set of optimization algorithms, 
                                                             # Adam: an optimization algorithm that uses adaptive learning rates
from keras.callbacks import ModelCheckpoint                  # keras.callbacks: provides a set of callback functions, 
                                                             # ModelCheckpoint: a callback that saves the model's weights during training if the validation loss improves

import matplotlib.pyplot as plt                              # matplotlib.pyplot: plotting library for generating visualizations 

In [3]:
# Define the path of Training and Validation Datasets

train_data_path = r"E:\SAP\Innovation Marathon\Project\train_test_val_data\train"
validation_data_path = r"E:\SAP\Innovation Marathon\Project\train_test_val_data\val"

In [4]:
# Define a function to plot a set of images i.e. show augmented images
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20, 20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
    plt.tight_layout()
    plt.show()

In [5]:
# Generating Augmented training Images using below parameters
training_datagen = ImageDataGenerator(rescale=1./255,
                                      rotation_range=40,
                                      width_shift_range=0.2,
                                      height_shift_range=0.2,
                                      shear_range=0.2,
                                      zoom_range=0.2,
                                      horizontal_flip=True,
                                      fill_mode='nearest')

# Generator that will read pictures found in train_data_path, and indefinitely generate batches of augmented image data
training_data = training_datagen.flow_from_directory(train_data_path, # this is the target directory
                                      target_size=(150, 150), # all images will be resized to 150x150
                                      batch_size=32,
                                      class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

Found 1951 images belonging to 4 classes.


In [6]:
# Mapping each subdirectory in train_data_path to a unique integer label

training_data.class_indices

{'diseased cotton leaf': 0,
 'diseased cotton plant': 1,
 'fresh cotton leaf': 2,
 'fresh cotton plant': 3}

In [None]:
# Generating Augmented Validation Images using below parameters
valid_datagen = ImageDataGenerator(rescale=1./255)

# Generator that reads images from a directory and generates batches of validation image data.
valid_data = valid_datagen.flow_from_directory(validation_data_path,
                                  target_size=(150,150),
                                  batch_size=32,
                                  class_mode='binary')

In [None]:
# Genarating 5 augmented images using the training_data and passing this list to the plotImages function to display the images

images = [training_data[0][0][0] for i in range(5)]
plotImages(images)

In [None]:
# Save best model using val_accuracy metric
model_path = r'E:\SAP\Innovation Marathon\Project\Trained Model\cotton_plant_disease_pred_model.h5'
checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
# Building CNN model using the Keras Sequential API

# Conv2D layers apply a set of learnable filters to the input image, resulting in a feature map
# MaxPooling2D layers perform downsampling on the output feature maps
# Dropout layers randomly drop out some of the neurons during training
# Flatten layer flattens the output feature maps of the last convolutional layer into a 1D vector
# Dense layers are fully connected layers that perform classification based on the features extracted by the convolutional layers

cnn_model = keras.models.Sequential([
                                    keras.layers.Conv2D(filters=32, kernel_size=3, input_shape=[150, 150, 3]),
                                    keras.layers.MaxPooling2D(pool_size=(2,2)),
                                    keras.layers.Conv2D(filters=64, kernel_size=3),
                                    keras.layers.MaxPooling2D(pool_size=(2,2)),
                                    keras.layers.Conv2D(filters=128, kernel_size=3),
                                    keras.layers.MaxPooling2D(pool_size=(2,2)),                                    
                                    keras.layers.Conv2D(filters=256, kernel_size=3),
                                    keras.layers.MaxPooling2D(pool_size=(2,2)),

                                    keras.layers.Dropout(0.5),                                                                        
                                    keras.layers.Flatten(), # neural network beulding
                                    keras.layers.Dense(units=128, activation='relu'), # input layers
                                    keras.layers.Dropout(0.1),                                    
                                    keras.layers.Dense(units=256, activation='relu'),                                    
                                    keras.layers.Dropout(0.25),                                    
                                    keras.layers.Dense(units=4, activation='softmax') # output layer
])


# Configure the learning process of the model
cnn_model.compile(optimizer = Adam(lr=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Display summary of the architecture of a CNN model

cnn_model.summary()

In [None]:
# Train the cnn_model using the training_data generator and validate it using the valid_data generator for 500 epochs
# callbacks_list is used to save the best model based on validation accuracy

history = cnn_model.fit(training_data, 
                          epochs=500, 
                          verbose=1, 
                          validation_data= valid_data,
                          callbacks=callbacks_list) 

In [None]:
# Save the Best trained cnn_model

best_model_path = r'E:\SAP\Innovation Marathon\Project\Trained Model\Best Model\cotton_plant_disease_pred_best_model.h5'
cnn_model.save(best_model_path)

In [None]:
# Visualize the training and validation performance of the model over each epoch

# for history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# for history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# Training and Validation loss and accuracy values at each epoch during the training process

history.history