In [None]:
%pip install split-folders

In [None]:
%pip install tensorflow==2.12.0

In [1]:
import sys
print(sys.version)

3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]


In [2]:
import tensorflow as tf
print(tf.__version__)

2.12.0


In [3]:
import numpy as np
import os, splitfolders
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG19, VGG16, MobileNetV2
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

In [4]:
IMAGE_SIZE = (224, 224, 3)
BATCH_SIZE = 32
EPOCHS = 5
NUM_CLASSES = 29

In [5]:
splitfolders.ratio('/kaggle/input/raw-plant/botanify-360', output='/kaggle/working/tmp/plant-cv-images', seed=1337, ratio=(.6, .4))

Copying files: 10440 files [01:23, 125.73 files/s]


In [6]:
train_plant_datagen = ImageDataGenerator(
  rescale=1./255,
  shear_range=0.2,
  zoom_range=0.2,
  fill_mode='nearest',
)

test_plant_datagen = ImageDataGenerator(
  rescale=1./255
)

In [7]:
train_plant_generator = train_plant_datagen.flow_from_directory(
  '/kaggle/working/tmp/plant-cv-images/train',
  target_size=(IMAGE_SIZE[0], IMAGE_SIZE[1]),
  batch_size=32,
  class_mode='sparse'
)

validation_plant_generator = test_plant_datagen.flow_from_directory(
  '/kaggle/working/tmp/plant-cv-images/val',
  target_size=(IMAGE_SIZE[0], IMAGE_SIZE[1]),
  batch_size=32,
  class_mode='sparse'
)

Found 6264 images belonging to 29 classes.
Found 4176 images belonging to 29 classes.


# Models

In [8]:
# Create a callbacks
class CustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('accuracy') >= 0.95 and logs.get('val_accuracy') >= 0.95:
            print(f'\Training accuracy is higher than validation accuracy!')
            self.model.stop_training = True

callbacks = CustomCallback()

## CNN Model

In [None]:
model = Sequential([
    ZeroPadding2D(padding=(2, 2), input_shape=IMAGE_SIZE),
    Conv2D(16, (5, 5), strides=(1, 1), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(32, (3, 3), strides=(1, 1), activation='relu'),
    Conv2D(32, (3, 3), strides=(1, 1), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), strides=(1, 1), activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(NUM_CLASSES, activation='softmax')
])

model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

history = model.fit(
    train_plant_generator, 
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_plant_generator,
    callbacks=[callbacks]
)

In [None]:
model.save('cnn_model.h5')

In [None]:
model.evaluate(validation_plant_generator)

## VGG16 Model

In [9]:
input_shape = IMAGE_SIZE

vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

for layer in vgg16_base.layers:
    layer.trainable = False

inputs = Input(shape=input_shape)
x = vgg16_base(inputs, training=False)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)

model_vgg16 = Model(inputs=inputs, outputs=outputs)

model_vgg16.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_vgg16.summary()

history_vgg16 = model_vgg16.fit(
    train_plant_generator,
    batch_size=BATCH_SIZE, 
    epochs=EPOCHS,
    validation_data=(validation_plant_generator),
    callbacks=[callbacks]
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 256)               6422784   
                                                                 
 dense_1 (Dense)             (None, 29)                7453      
                                                                 
Total params: 21,144,925
Trainable params: 6,430,237
N

In [10]:
model_vgg16.save('botanify_model_vgg16.h5')

In [None]:
model_vgg16.evaluate(validation_plant_generator)

## VGG19 Model

In [None]:
input_shape = IMAGE_SIZE

vgg19_base = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)

for layer in vgg19_base.layers:
    layer.trainable = False

inputs = Input(shape=input_shape)
x = vgg19_base(inputs, training=False)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)

model_vgg19 = Model(inputs=inputs, outputs=outputs)

model_vgg19.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_vgg19.summary()

history_vgg19 = model_vgg19.fit(
    train_plant_generator,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_plant_generator),
    callbacks=[callbacks]
)

In [None]:
model_vgg19.save('model_vgg19.h5')

In [None]:
model_vgg19.evaluate(X_val, y_val)

## MobileNetV2 Model

In [None]:
input_shape = IMAGE_SIZE

mobilenetv2_base = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)

for layer in mobilenetv2_base.layers:
    layer.trainable = False

inputs = Input(shape=input_shape)
x = mobilenetv2_base(inputs, training=False)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)

model_mobilenetv2 = Model(inputs=inputs, outputs=outputs)

model_mobilenetv2.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_mobilenetv2.summary()

history_mobilenetv2 = model_mobilenetv2.fit(
    train_plant_generator,
    batch_size=BATCH_SIZE, 
    epochs=EPOCHS,
    validation_data=(validation_plant_generator),
    callbacks=[callbacks]
)

In [None]:
model_mobilenetv2.save('model_mobilenetv2.h5')

In [None]:
model_mobilenetv2.evaluate(validation_plant_generator)

# Visualization

In [None]:
class_names = os.listdir(train_folder)
class_names.sort()

## Plot CNN

In [None]:
train_loss = history.history['accuracy']
val_loss = history.history['val_accuracy']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Accuracy')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
train_loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Loss')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

## Plot VGG16

In [None]:
train_loss = history_vgg16.history['accuracy']
val_loss = history_vgg16.history['val_accuracy']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Accuracy')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
train_loss = history_vgg16.history['loss']
val_loss = history_vgg16.history['val_loss']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Loss')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

## Prediction CNN

In [None]:
# Mendapatkan prediksi untuk seluruh dataset validasi
test_predictions = model.predict(validation_plant_generator)

# Mengambil class names dari validation_generator
class_to_index = {class_name: i for i, class_name in enumerate(class_names)}
index_to_class = {i: class_name for i, class_name in enumerate(class_names)}

# Mengonversi prediksi ke kelas yang diprediksi
test_predictions_classes = np.argmax(test_predictions, axis=1)
test_predictions_class_names = [index_to_class[idx] for idx in test_predictions_classes]

# Mengambil X_val dari validation_generator
X_val = []
y_val = []


validation_plant_generator.reset()
for i in range(len(validation_plant_generator)):
    X, y = next(validation_plant_generator)
    X_val.append(X)
    y_val.append(y)

X_val = np.concatenate(X_val)
y_val = np.concatenate(y_val)

# Plotting hasil prediksi
plt.figure(figsize=(10, 10))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.imshow(X_val[i])
    plt.title("Predicted: {}".format(test_predictions_class_names[i]))
    plt.axis('off')
plt.tight_layout()
plt.show()

## Confusion Matrix CNN

In [None]:
# Get true labels and predicted labels for validation data
y_true_val = y_val
y_pred_val = np.array([np.argmax(pred) for pred in model.predict(X_val)])

# Compute confusion matrix
conf_mat_val = confusion_matrix(y_true_val, y_pred_val)

# Plot confusion matrix for validation data
plt.figure(figsize=(8, 6))
sns.heatmap(conf_mat_val, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix - Validation Data')
plt.show()

## Classification Report CNN

In [None]:
# Print classification report
print("Classification Report:\n", classification_report(y_true_val, y_pred_val, target_names=class_names))

## Prediction VGG16

In [None]:
test_predictions_vgg16 = model_vgg16.predict(validation_plant_generator)
class_to_index = {class_name: i for i, class_name in enumerate(class_names)}
index_to_class = {i: class_name for i, class_name in enumerate(class_names)}
test_predictions_classes_vgg16 = np.argmax(test_predictions_vgg16, axis=1)
test_predictions_class_names_vgg16 = [index_to_class[idx] for idx in test_predictions_classes_vgg16]

# Mengambil X_val dari validation_generator
X_val = []
y_val = []

validation_plant_generator.reset()
for i in range(len(validation_plant_generator)):
    X, y = next(validation_plant_generator)
    X_val.append(X)
    y_val.append(y)

X_val = np.concatenate(X_val)
y_val = np.concatenate(y_val)

plt.figure(figsize=(10, 10))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.imshow(X_val[i])
    plt.title("Predicted: {}".format(test_predictions_class_names_vgg16[i]))
    plt.axis('off')
plt.tight_layout()
plt.show()

## Confusion Matrix VGG16

In [None]:
# Get true labels and predicted labels for validation data
y_true_val_vgg16 = y_val
y_pred_val_vgg16 = np.array([np.argmax(pred) for pred in model_vgg16.predict(X_val)])

# Compute confusion matrix
conf_mat_val_vgg16 = confusion_matrix(y_true_val_vgg16, y_pred_val_vgg16)

# Plot confusion matrix for validation data
plt.figure(figsize=(8, 6))
sns.heatmap(conf_mat_val_vgg16, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix - Validation Data')
plt.show()

## Classification Report VGG16

In [None]:
# Print classification report
print("Classification Report:\n", classification_report(y_true_val_vgg16, y_pred_val_vgg16, target_names=class_names))

## Plot VGG19

In [None]:
train_loss = history_vgg19.history['accuracy']
val_loss = history_vgg19.history['val_accuracy']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Accuracy')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
train_loss = history_vgg19.history['loss']
val_loss = history_vgg19.history['val_loss']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Loss')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

## Prediction VGG19

In [None]:
test_predictions_vgg19 = model_vgg19.predict(validation_plant_generator)
class_to_index = {class_name: i for i, class_name in enumerate(class_names)}
index_to_class = {i: class_name for i, class_name in enumerate(class_names)}
test_predictions_classes_vgg19 = np.argmax(test_predictions_vgg19, axis=1)
test_predictions_class_names_vgg19 = [index_to_class[idx] for idx in test_predictions_classes_vgg19]

X_val = []
y_val = []

validation_plant_generator.reset()
for i in range(len(validation_plant_generator)):
    X, y = next(validation_plant_generator)
    X_val.append(X)
    y_val.append(y)

X_val = np.concatenate(X_val)
y_val = np.concatenate(y_val)

plt.figure(figsize=(10, 10))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.imshow(X_val[i])
    plt.title("Predicted: {}".format(test_predictions_class_names_vgg19[i]))
    plt.axis('off')
plt.tight_layout()
plt.show()

## Confusion Matrix VGG19

In [None]:
# Get true labels and predicted labels for validation data
y_true_val_vgg19 = y_val
y_pred_val_vgg19 = np.array([np.argmax(pred) for pred in model_vgg19.predict(X_val)])

# Compute confusion matrix
conf_mat_val_vgg19 = confusion_matrix(y_true_val_vgg19, y_pred_val_vgg19)

# Plot confusion matrix for validation data
plt.figure(figsize=(8, 6))
sns.heatmap(conf_mat_val_vgg19, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix - Validation Data')
plt.show()

## Classification Report VGG19

In [None]:
# Print classification report
print("Classification Report:\n", classification_report(y_true_val_vgg19, y_pred_val_vgg19, target_names=class_names))

## Plot MobileNetV2

In [None]:
train_loss = history_mobilenetv2.history['accuracy']
val_loss = history_mobilenetv2.history['val_accuracy']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Accuracy')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
train_loss = history_mobilenetv2.history['loss']
val_loss = history_mobilenetv2.history['val_loss']

epochs = range(1, len(train_loss) + 1)

plt.plot(epochs, train_loss, 'b-', label='Training Loss')  # Change 'bo' to 'b-'
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

## Prediciton MobileNetV2

In [None]:
# Predict class names for test data
test_predictions_mobilenetv2= model_mobilenetv2.predict(validation_plant_generator)
class_to_index = {class_name: i for i, class_name in enumerate(class_names)}
index_to_class = {i: class_name for i, class_name in enumerate(class_names)}
test_predictions_classes_mobilenetv2 = np.argmax(test_predictions_mobilenetv2, axis=1)
test_predictions_class_names_mobilenetv2 = [index_to_class[idx] for idx in test_predictions_classes_mobilenetv2]

X_val = []
y_val = []

validation_plant_generator.reset()
for i in range(len(validation_plant_generator)):
    X, y = next(validation_plant_generator)
    X_val.append(X)
    y_val.append(y)

X_val = np.concatenate(X_val)
y_val = np.concatenate(y_val)

plt.figure(figsize=(10, 10))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.imshow(X_val[i])
    plt.title("Predicted: {}".format(test_predictions_class_names_mobilenetv2[i]))
    plt.axis('off')
plt.tight_layout()
plt.show()

## Confusion Matrix MobileNetV2

In [None]:
# Get true labels and predicted labels for validation data
y_true_val_mobilenetv2 = y_val
y_pred_val_mobilenetv2 = np.array([np.argmax(pred) for pred in model_mobilenetv2.predict(X_val)])

# Compute confusion matrix
conf_mat_val_mobilenetv2 = confusion_matrix(y_true_val_mobilenetv2, y_pred_val_mobilenetv2)

# Plot confusion matrix for validation data
plt.figure(figsize=(8, 6))
sns.heatmap(conf_mat_val_mobilenetv2, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix - Validation Data')
plt.show()

## Classification Report MobileNetV2

In [None]:
# Print classification report
print("Classification Report:\n", classification_report(y_true_val_mobilenetv2, y_pred_val_mobilenetv2, target_names=class_names))

# Test Unseen Data

In [None]:
test_folder = os.listdir('/kaggle/input/raw-data-again/unseen/unseen')

score_result = {} 

total_score = 0
total_data = 0

for folder in test_folder:
  score = 0
  i = 0
  plant = os.listdir(f'/kaggle/input/raw-data-again/unseen/unseen/{folder}')
  # if folder == 'Tulip':
  for uploaded in plant:
    fn = f'/kaggle/input/raw-data-again/unseen/unseen/{folder}/{uploaded}'
    # predicting images
    path = fn
    img = image.load_img(path, target_size=(150,150))

    # imgplot = plt.imshow(img)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])

    classes = model.predict(images, batch_size=64)
    out = np.argmax(classes)
    # print(classes)
    print(class_names[out])
    
    if class_names[out] == folder:
      print(uploaded, folder)
      score += 1
    i += 1
    total_data += 1
  
  total_score += score
  score_result[folder] = f'{score}/{i}'

print(f"Score Result: {score_result}\n Total Score: {total_score}/{total_data} ({(total_score/total_data)*100}%)")

In [None]:
test_folder = os.listdir('/kaggle/input/raw-data-again/unseen/unseen')

score_result = {} 

total_score = 0
total_data = 0

for folder in test_folder:
  score = 0
  i = 0
  plant = os.listdir(f'/kaggle/input/raw-data-again/unseen/unseen/{folder}')
  # if folder == 'Tulip':
  for uploaded in plant:
    fn = f'/kaggle/input/raw-data-again/unseen/unseen/{folder}/{uploaded}'
    # predicting images
    path = fn
    img = image.load_img(path, target_size=(100,100))

    # imgplot = plt.imshow(img)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])

    classes = model_vgg16.predict(images, batch_size=64)
    out = np.argmax(classes)
    print(out)
    print(class_names[out])
    
    if class_names[out] == folder:
      print(uploaded, folder)
      score += 1
    i += 1
    total_data += 1
  
  total_score += score
  score_result[folder] = f'{score}/{i}'

print(f"Score Result: {score_result}\n Total Score: {total_score}/{total_data} ({(total_score/total_data)*100}%)")

In [None]:
test_folder = os.listdir('/kaggle/input/raw-data-again/unseen/unseen')

score_result = {} 

total_score = 0
total_data = 0

for folder in test_folder:
  score = 0
  i = 0
  plant = os.listdir(f'/kaggle/input/raw-data-again/unseen/unseen/{folder}')
  # if folder == 'Tulip':
  for uploaded in plant:
    fn = f'/kaggle/input/raw-data-again/unseen/unseen/{folder}/{uploaded}'
    # predicting images
    path = fn
    img = image.load_img(path, target_size=(224,224))

    # imgplot = plt.imshow(img)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])

    classes = model_vgg19.predict(images, batch_size=64)
    out = np.argmax(classes)
    # print(classes)
    print(class_names[out])
    
    if class_names[out] == folder:
      print(uploaded, folder)
      score += 1
    i += 1
    total_data += 1
  
  total_score += score
  score_result[folder] = f'{score}/{i}'

print(f"Score Result: {score_result}\n Total Score: {total_score}/{total_data} ({(total_score/total_data)*100}%)")

In [None]:
test_folder = os.listdir('/kaggle/input/raw-data-again/unseen/unseen')

score_result = {} 

total_score = 0
total_data = 0

for folder in test_folder:
  score = 0
  i = 0
  plant = os.listdir(f'/kaggle/input/raw-data-again/unseen/unseen/{folder}')
  # if folder == 'Tulip':
  for uploaded in plant:
    fn = f'/kaggle/input/raw-data-again/unseen/unseen/{folder}/{uploaded}'
    # predicting images
    path = fn
    img = image.load_img(path, target_size=(224,224))

    # imgplot = plt.imshow(img)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])

    classes = model_mobilenetv2.predict(images, batch_size=64)
    out = np.argmax(classes)
    # print(classes)
    print(class_names[out])
    
    if class_names[out] == folder:
      print(uploaded, folder)
      score += 1
    i += 1
    total_data += 1
  
  total_score += score
  score_result[folder] = f'{score}/{i}'

print(f"Score Result: {score_result}\n Total Score: {total_score}/{total_data} ({(total_score/total_data)*100}%)")

In [None]:
# write list train_folder to a txt file
with open('class_name.txt', 'w') as f:
    for item in class_names:
        f.write('\'' + item + '\'' + ', ')