### **Data Preparation**

In [None]:
import pandas as pd
import os
import shutil

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Crop Disease Recognition/Dataset/class.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1821 entries, 0 to 1820
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   image_id           1821 non-null   object
 1   healthy            1821 non-null   int64 
 2   multiple_diseases  1821 non-null   int64 
 3   rust               1821 non-null   int64 
 4   scab               1821 non-null   int64 
dtypes: int64(4), object(1)
memory usage: 71.3+ KB


In [None]:
data.head(10)

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0,0,0,0,1
1,Train_1,0,1,0,0
2,Train_2,1,0,0,0
3,Train_3,0,0,1,0
4,Train_4,1,0,0,0
5,Train_5,1,0,0,0
6,Train_6,0,1,0,0
7,Train_7,0,0,0,1
8,Train_8,0,0,0,1
9,Train_9,1,0,0,0


In [None]:
# Checking no. of samples for each class

print("healthy: ", data['healthy'].value_counts()[1])
print("multiple_diseases: ", data['multiple_diseases'].value_counts()[1])
print("rust: ", data['rust'].value_counts()[1])
print("scab: ", data['scab'].value_counts()[1])

# Total: 1821
# 	Healthy: 516
# 	Multiple: 91
# 	Rust: 622
# 	Scab: 592

# Train: 1458
# 	Healthy: 413
# 	Multiple: 73
# 	Rust: 498
# 	Scab: 474
	
# Test: 183
# 	Healthy: 52
# 	Multiple: 10
# 	Rust: 62
# 	Scab: 59

# Validation: 180
# 	Healthy: 51
# 	Multiple: 8
# 	Rust: 62
# 	Scab: 59

healthy:  516
multiple_diseases:  91
rust:  622
scab:  592


In [None]:
# Categorizing images into classes

classes = ['healthy', 'multiple_diseases', 'rust', 'scab']
data_path = '/content/drive/MyDrive/Crop Disease Recognition/Dataset/'
dest_path = '/content/drive/MyDrive/Crop Disease Recognition/Dataset/Splits/'

try:
    os.mkdir(dest_path + 'Categorized')

    for c in classes:
        os.mkdir(os.path.join(dest_path, 'Categorized', c))

    imgs = os.listdir(data_path + 'Images/')

    for img in imgs:
        for i in range(len(data)):
            if img[:-4] == data.iloc[i]['image_id']:
                shutil.copyfile(os.path.join(data_path, 'Images', img), os.path.join(dest_path, 'Categorized', data.columns[data.iloc[i] == 1][0], img))
                break
except:
    print("!! Directory already exists !!")

### **Model Training using Transfer Learning**

In [None]:
# Using pre-trained weights of VGG16 architecture

from keras.applications import vgg16
img_rows, img_cols = 224, 224
VGG = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(img_rows, img_cols,3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Freezing trained layers

for layer in VGG.layers:
    layer.trainable = False

In [None]:
# Building layers in CNN

def top(bottom_model, num_classes):
    top_model = bottom_model.output
    top_model = GlobalAveragePooling2D()(top_model)
    top_model = Dense(1024,activation='relu')(top_model)
    top_model = Dense(512,activation='relu')(top_model)
    top_model = Dense(512,activation='relu')(top_model)
    top_model = Dense(256,activation='relu')(top_model)
    top_model = Dense(128,activation='relu')(top_model)
    top_model = Dense(num_classes,activation='softmax')(top_model)
    return top_model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import BatchNormalization
from keras.models import Model

num_classes = 4

FC_Head = top(VGG, num_classes)

model = Model(inputs = VGG.input, outputs = FC_Head)

print(model.summary())

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_data_dir = '/content/drive/MyDrive/Crop Disease Recognition/Dataset/Splits/train'
validation_data_dir = '/content/drive/MyDrive/Crop Disease Recognition/Dataset/Splits/validation'
 
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 64
 
train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        class_mode='categorical')
 
validation_generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        class_mode='categorical')

Found 1253 images belonging to 4 classes.
Found 180 images belonging to 4 classes.


In [None]:
# Training the model

from tensorflow.keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping
                  
checkpoint = ModelCheckpoint("crop_disease.h5",
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss', 
                          min_delta = 0, 
                          patience = 3,
                          verbose = 1,
                          restore_best_weights = True)

callbacks = [earlystop, checkpoint]
 
model.compile(loss = 'categorical_crossentropy',
              optimizer = RMSprop(lr = 0.001),
              metrics = ['accuracy'])

nb_train_samples = 1253
nb_validation_samples = 180
 
epochs = 10
batch_size = 64

history = model.fit(
    train_generator,
    steps_per_epoch = nb_train_samples // batch_size,
    epochs = epochs,
    callbacks = callbacks,
    validation_data = validation_generator,
    validation_steps = nb_validation_samples // batch_size)

  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/10
Epoch 00001: val_loss improved from inf to 1.22396, saving model to crop_disease.h5
Epoch 2/10
Epoch 00002: val_loss improved from 1.22396 to 1.17712, saving model to crop_disease.h5
Epoch 3/10
Epoch 00003: val_loss did not improve from 1.17712
Epoch 4/10
Epoch 00004: val_loss improved from 1.17712 to 1.07006, saving model to crop_disease.h5
Epoch 5/10
Epoch 00005: val_loss did not improve from 1.07006
Epoch 6/10
Epoch 00006: val_loss improved from 1.07006 to 0.95707, saving model to crop_disease.h5
Epoch 7/10
Epoch 00007: val_loss did not improve from 0.95707
Epoch 8/10
Epoch 00008: val_loss did not improve from 0.95707
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.95707
Epoch 00009: early stopping
