# 1. Import Libraries:

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, MaxPooling2D, Conv2D, BatchNormalization
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import applications
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from glob import glob
from sklearn.model_selection import train_test_split

# 2. Define Image Size and Batch Size:

In [None]:
IMAGE_SIZE = 224 # define the image size for all images (height and Width = 224 X 224)
BATCH_SIZE = 64  # At the time of training 64 images will be there at a time

# 3. Set Folder Paths:

In [None]:
base_dir = "/kaggle/input/tomatoleaf/tomato"
train_dir = "/kaggle/input/tomatoleaf/tomato/train"
validation_dir = "/kaggle/input/tomatoleaf/tomato/val"

# 4. Get the Number of Classes from Folder Names:

In [None]:
folders = glob(train_dir + '/*')
print(len(folders))

num_classes = len(folders)
print("Number of classes:", num_classes)

# 5. Load VGG16 Model:

In [None]:
# IMAGE_SIZE = [224, 224]
vgg_model = VGG16(weights = 'imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)) 
# include_top = False (where we are defining our own class/label) means we are working at our own dataset where we will have our own label
# here we will have 10 layers (as 10 classes are there)
vgg_model.output

# 6. Modify the output layer

In [None]:
# Build the model
x = vgg_model.output

# Adding additional Conv2D and MaxPooling2D layers
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Flatten the output from the convolutional layers
x = Flatten()(x)

# Adding dense layers with dropout regularization
x = Dense(2048, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.2)(x)

# Output layer with sigmoid activation
prediction = Dense(num_classes, activation='sigmoid')(x)

In [None]:
# #### Adding a Flatten layer
# x = vgg_model.output
# x = Flatten()(x)

# # Adding dense layers with dropout and batch normalization
# x = Dense(2048, activation='relu')(x)
# x = BatchNormalization()(x)
# x = Dropout(0.3)(x)
# x = Dense(1024, activation='relu')(x)
# x = BatchNormalization()(x)
# x = Dropout(0.3)(x)
# x = Dense(512, activation='relu')(x)
# x = BatchNormalization()(x)
# x = Dropout(0.2)(x)

# # Output layer with sigmoid activation
# prediction = Dense(num_classes, activation='sigmoid')(x)

# 7. Create a new model with the new output layer

In [None]:
model = Model(inputs=vgg_model.input, outputs=prediction)
model.summary()

# 8. Freeze Convolutional Layers (Optional Fine-Tuning):

In [None]:
# freeze all layers
for layer in vgg_model.layers:
    layer.trainable = False

# 9. Compile the Model (Initial Training):

In [None]:
rms_optimizer = RMSprop(learning_rate=0.0001, rho=0.9)  # Consider using a learning rate scheduler
model.compile(loss="categorical_crossentropy", optimizer=rms_optimizer, metrics=["accuracy"])

# 10. Data Augmentation for Training and Validation:

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
    validation_split = 0.2  # 20% for validation
)
validation_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split = 0.2 # 20% for validation
)
test_datagen =  ImageDataGenerator(
    rescale=1./255,
)

# 11. Prepare Data Generators:

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_dir, # define directory
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True, 
    subset='training',  # Specify that this is for training
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',  # Specify that this is for training
)
test_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size = (IMAGE_SIZE, IMAGE_SIZE),
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = 'categorical'
)

# 12. Early Stopping and Learning Rate Reduction:

In [None]:
early_stopping = EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=3, min_lr=0.0001)
model_checkpoint = ModelCheckpoint('tomato_vgg16_model_sigmoid_update2.weights.h5', monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=1)

# 13. Train the Model (Initial):

In [None]:
epoch = 30
history = model.fit(
    train_generator, epochs=epoch, validation_data=validation_generator, callbacks=[early_stopping, reduce_lr, model_checkpoint]
)

# 14. Evaluate the Model after Initial Training:

In [None]:
score = model.evaluate(test_generator)
print("Test loss (initial):", score[0])
print("Test accuracy (initial):", score[1])

# 15. Fine-Tuning Strategy

In [None]:
# Unfreeze the last 10 layers
for layer in vgg_model.layers[-10:]:
    layer.trainable = True

# 16. Recompile the model with a lower learning rate

In [None]:
# Create a new optimizer for fine-tuning
rms_optimizer = RMSprop(learning_rate=1e-5)
model.compile(loss="categorical_crossentropy", optimizer=rms_optimizer, metrics=["accuracy"])

# 17. Continue training the model

In [None]:
history = model.fit(
    train_generator, epochs=epoch, validation_data=validation_generator, callbacks=[early_stopping, reduce_lr, model_checkpoint]
)

# 18. Evaluate the fine-tune model

In [None]:
# Evaluate the fine-tuned model
score = model.evaluate(test_generator)
print("Test loss (fine-tuned):", score[0])
print("Test accuracy (fine-tuned):", score[1])

# 19. Get Classification Report

In [None]:
import numpy as np
from sklearn.metrics import classification_report
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

class_labels = ['Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___Late_blight', 
                'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite',
                'Tomato___Target_Spot', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Tomato___Tomato_mosaic_virus',
                'Tomato___healthy']

# Assuming test_generator is defined and used for evaluation
# Make predictions
y_pred = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred, axis=1)

# True labels
y_true = test_generator.classes

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=test_generator.class_indices.keys(), yticklabels=test_generator.class_indices.keys())
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Classification report
report = classification_report(y_true, y_pred_classes, target_names=test_generator.class_indices.keys())
print(report)

# 20. Update the weights of the model

In [None]:
model.load_weights('/kaggle/working/tomato_vgg16_model_sigmoid_update2.weights.h5')