# 1. Import Libraries:

In [1]:
base_dir = "/kaggle/input/tomatoleaf/tomato"
train_dir = "/kaggle/input/tomatoleaf/tomato/train"
validation_dir = "/kaggle/input/tomatoleaf/tomato/val"

# 2. Define Image Size and Batch Size:

In [2]:
IMAGE_SIZE = 224 # define the image size for all images (height and Width = 224 X 224)
BATCH_SIZE = 32  # At the time of training 64 images will be there at a time

# 3. Set Folder Paths:

In [3]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import applications
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from glob import glob
from sklearn.model_selection import train_test_split

2024-05-16 15:58:16.524153: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-16 15:58:16.524236: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-16 15:58:16.651676: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# 4. Get the Number of Classes from Folder Names:

In [4]:
folders = glob(train_dir + '/*')
print(len(folders))

num_classes = len(folders)
print("Number of classes:", num_classes)

10
Number of classes: 10


# 5. Load VGG16 Model:

In [5]:
# IMAGE_SIZE = [224, 224]
vgg_model = VGG16(weights = 'imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)) 
# include_top = False (where we are defining our own class/label) means we are working at our own dataset where we will have our own label
# here we will have 10 layers (as 10 classes are there)
vgg_model.output

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


<KerasTensor shape=(None, 7, 7, 512), dtype=float32, sparse=False, name=keras_tensor_18>

# 6. Modify the output layer

In [6]:
x = Flatten()(vgg_model.output)

# Dense layers with dropout regularization
x = Dense(2048, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.4)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)

# Dense layer will add neurons of the lenth of folders 
prediction = Dense(num_classes, activation="sigmoid")(x) # here x is the output of vgg

# 7. Freeze Convolutional Layers (Optional Fine-Tuning):

In [7]:
# unfreeze layers
for layer in vgg_model.layers[-10:]:
    layer.trainable = True
# We have total 19 layers

# 8. Create a new model with the new output layer

In [8]:
model = Model(inputs=vgg_model.input, outputs=prediction)
model.summary()

# 8. Compile the Model (Initial Training):

In [9]:
rms_optimizer = RMSprop(learning_rate=0.0001, rho=0.9)  # Consider using a learning rate scheduler
model.compile(loss="categorical_crossentropy", optimizer=rms_optimizer, metrics=["accuracy"])

# 9. Data Augmentation for Training and Validation:

In [10]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
    validation_split = 0.2  # 20% for validation
)
validation_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split = 0.2 # 20% for validation
)
test_datagen =  ImageDataGenerator(
    rescale=1./255,
)

# 10. Prepare Data Generators:

In [11]:
train_generator = train_datagen.flow_from_directory(
    train_dir, # define directory
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,  # Shuffle the data before splitting
    subset='training',  # Specify that this is for training
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',  # Specify that this is for training
)
test_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size = (IMAGE_SIZE, IMAGE_SIZE),
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = 'categorical'
)

Found 8000 images belonging to 10 classes.
Found 2000 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.


# 11. Early Stopping and Learning Rate Reduction:

In [12]:
early_stopping = EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=3, min_lr=0.0001)
model_checkpoint = ModelCheckpoint('tomato_vgg16_model_sigmoid_update1.weights.h5', monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=1)

# 12. Train the Model (Initial):

In [13]:
epoch = 20
history = model.fit(
    train_generator, epochs=epoch, validation_data=validation_generator, callbacks=[early_stopping, reduce_lr, model_checkpoint]
)

Epoch 1/20


  self._warn_if_super_not_called()
2024-05-16 15:59:27.079340: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 0: 4.63498, expected 3.85968
2024-05-16 15:59:27.079397: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 3: 6.55784, expected 5.78254
2024-05-16 15:59:27.079406: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 4: 6.61282, expected 5.83752
2024-05-16 15:59:27.079413: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 6: 6.34499, expected 5.56968
2024-05-16 15:59:27.079421: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 7: 6.25159, expected 5.47629
2024-05-16 15:59:27.079429: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 8: 5.43964, expected 4.66434
2024-05-16 15:59:27.079437: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 9: 6.54333, expected 5.76803
2024-05-16 15:59:27.

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452ms/step - accuracy: 0.1025 - loss: 2.3421

W0000 00:00:1715875319.041013     103 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
2024-05-16 16:02:26.445675: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 0: 3.89842, expected 3.37692
2024-05-16 16:02:26.445732: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 12: 3.13412, expected 2.61262
2024-05-16 16:02:26.445741: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 13: 3.08377, expected 2.56227
2024-05-16 16:02:26.445749: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 14: 4.1021, expected 3.5806
2024-05-16 16:02:26.445757: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 29: 3.76539, expected 3.24388
2024-05-16 16:02:26.445765: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 30: 4.19841, expected 3.6769
2024-05-16 16:02:26.445773: E external/local_xla/xla/service/gp


Epoch 1: val_loss improved from inf to 2.24746, saving model to tomato_vgg16_model_sigmoid_update1.weights.h5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 612ms/step - accuracy: 0.1026 - loss: 2.3419 - val_accuracy: 0.1485 - val_loss: 2.2475 - learning_rate: 1.0000e-04
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366ms/step - accuracy: 0.1980 - loss: 2.1785
Epoch 2: val_loss improved from 2.24746 to 1.94247, saving model to tomato_vgg16_model_sigmoid_update1.weights.h5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 463ms/step - accuracy: 0.1981 - loss: 2.1783 - val_accuracy: 0.3010 - val_loss: 1.9425 - learning_rate: 1.0000e-04
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 362ms/step - accuracy: 0.3017 - loss: 1.9528
Epoch 3: val_loss improved from 1.94247 to 1.71903, saving model to tomato_vgg16_model_sigmoid_update1.weights.h5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

# 13. Evaluate the Model after Initial Training:

In [15]:
score = model.evaluate(test_generator)
print("Test loss (initial):", score[0])
print("Test accuracy (initial):", score[1])

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 65ms/step - accuracy: 0.9090 - loss: 0.3291
Test loss (initial): 0.30585116147994995
Test accuracy (initial): 0.906000018119812


# 14. Get Classification Report

In [16]:
import numpy as np
from sklearn.metrics import classification_report
class_labels = ['Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___Late_blight', 
                'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite',
                'Tomato___Target_Spot', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Tomato___Tomato_mosaic_virus',
                'Tomato___healthy']

predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)
true_labels = test_generator.classes
report = classification_report(true_labels, predicted_classes, target_names=class_labels)
print(report)

[1m 4/32[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 60ms/step

W0000 00:00:1715876987.513778     104 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 96ms/step
                                               precision    recall  f1-score   support

                      Tomato___Bacterial_spot       0.94      0.94      0.94       100
                        Tomato___Early_blight       0.82      0.93      0.87       100
                         Tomato___Late_blight       0.99      0.83      0.90       100
                           Tomato___Leaf_Mold       0.99      0.97      0.98       100
                  Tomato___Septoria_leaf_spot       0.91      0.98      0.94       100
Tomato___Spider_mites Two-spotted_spider_mite       0.98      0.63      0.77       100
                         Tomato___Target_Spot       0.66      0.96      0.78       100
       Tomato___Tomato_Yellow_Leaf_Curl_Virus       1.00      0.94      0.97       100
                 Tomato___Tomato_mosaic_virus       0.99      0.94      0.96       100
                             Tomato___healthy       0.9

W0000 00:00:1715876990.484812     102 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


# 15. Update the weights of the model

In [18]:
model.load_weights('/kaggle/working/tomato_vgg16_model_sigmoid_update1.weights.h5')