In [None]:
import tensorflow as tf
import os
import cv2
import imghdr
import random
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow_addons as tfa

In [None]:
data_dir = "../input/tomato-prime/tomato_small3" 


healthy_path = os.path.join(data_dir, 'healthy')
sick_path = os.path.join(data_dir, 'sick')  # Adjust this string if your folder has a different name for sick tomatoes

healthy_images = [os.path.join(healthy_path, fname) for fname in os.listdir(healthy_path)]
sick_images = [os.path.join(sick_path, fname) for fname in os.listdir(sick_path)]

print(f"Number of healthy images: {len(os.listdir(healthy_path))}")
print(f"Number of sick images: {len(os.listdir(sick_path))}")


In [None]:
image_exts = ["jpeg", "jpg", "JPG", "JPEG", "png"]

In [None]:
for image_class in os.listdir(data_dir): 
    class_path = os.path.join(data_dir, image_class)
    
    if not os.path.isdir(class_path):  # Check if it's a directory
        continue

    for image in os.listdir(class_path):
        image_path = os.path.join(class_path, image)
        try: 
            img = cv2.imread(image_path)
            tip = imghdr.what(image_path)
            if tip not in image_exts: 
                print('Image not in ext list {}'.format(image_path))
                os.remove(image_path)
        except Exception as e: 
            print('Issue with image {}'.format(image_path))
            # os.remove(image_path)


In [None]:
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# Keras auto-pre-process the images for you, and labels them for you as well, probably based on file structure! Call ?? to see exact details. 
data = tf.keras.utils.image_dataset_from_directory(data_dir, image_size=(256, 256))


In [None]:
#To view our images, because we are using a data-pipeline and not loading images into memory, we must first convert the images into a numpy iterator 
data_iterator = data.as_numpy_iterator()

In [None]:
# this is getting the batch for us
batch = data_iterator.next()

In [None]:
# Images represented as numpy arrays.This is confirming what we did with keras utils process above
batch[0].shape

In [None]:
# This is the actual batch of data. Each number is an image and each  class 1 = sick & class 0 = healthy

batch[1]

In [None]:
unique, counts = np.unique(batch[1], return_counts=True)
print(dict(zip(unique, counts)))


In [None]:
# plotting the images to **visually** double check which number (0 or 1) is sick or healthy
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, img in enumerate(batch[0][:4]):
    ax[idx].imshow(img.astype(int))
    ax[idx].title.set_text(batch[1][idx])

<h1>Pre-process data</h1>

In [None]:
# Normalizing pixel values is common when training neural networks. 
# Neural networks perform better when data lies is in a small range around 0.  
# Raw pixel values range from 0 to 255 which is not ideal for a neural network. 
# By dividing each pixel value by 255, you're rescaling the pixel values to be in the range [0, 1].

data = data.map(lambda x,y: (x/255, y))

In [None]:
scaled_iterator = data.as_numpy_iterator().next()

In [None]:
# images are now in a small range between 0 and 1 
scaled_iterator

<h1>Split Data</h1>

In [None]:
train_size = int(len(data)*.7)
val_size = int(len(data)*.2)
test_size = int(len(data)*.1)

In [None]:
train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = data.skip(train_size + val_size).take(test_size)

<h1>Deep model</h1>

<h3>3.1 Build deep learning model</h3>

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten
from tensorflow.keras.optimizers import Adam
from keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model, load_model
from keras.models import Model, load_model
import tensorflow as tf


In [None]:
base_model = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(256,256,3)),
    MaxPool2D(pool_size=(2, 2)),
    
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    
    Conv2D(filters=256, kernel_size=(3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    
    Flatten(),
    
    Dense(units=256, activation='relu'),
    Dense(units=1, activation='sigmoid') 
])


In [None]:
f1_metric = tfa.metrics.F1Score(num_classes=1, threshold=0.5)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

base_model.compile(optimizer=optimizer, 
                   loss='binary_crossentropy', 
                   metrics=['accuracy', f1_metric])

In [None]:
base_model.summary()

<h3>3.2 train</h3>



In [None]:
logdir = "/Users/mirrosalim/Downloads/machine learning/data/logs"

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
es = EarlyStopping(monitor = 'val_accuracy', 
                   mode = 'max', 
                   patience = 5, 
                   verbose = 1, 
                   restore_best_weights = True)

In [None]:
hist = base_model.fit(train, epochs=50, validation_data=val, callbacks=[es])


<h3>3.3 Plot performance</h3>

In [None]:
fig = plt.figure()
plt.plot(hist.history['loss'], color='teal', label='loss')
plt.plot(hist.history['val_loss'], color='orange', label='val_loss')
fig.suptitle('Loss', fontsize=20)
plt.legend(loc="upper left")
plt.show()

In [None]:
fig = plt.figure()
plt.plot(hist.history['accuracy'], color='teal', label='accuracy')
plt.plot(hist.history['val_accuracy'], color='orange', label='val_accuracy')
fig.suptitle('Accuracy', fontsize=20)
plt.legend(loc="upper left")
plt.show()

<h1>test model</h1>

In [None]:
#Evaluate the model on test data
test_loss, test_accuracy, test_f1_score = base_model.evaluate(test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Test F1 Score: {test_f1_score}")

In [None]:
#evaluating the model on random scraped test data from google. Just to check models ability to generalize on uncommon data

# 1. Specify the path to the new scraped data
new_data_dir = "../input/prime-test/scarped_test_data_raw"

# 2. Load and label the data
scraped_test_data = tf.keras.utils.image_dataset_from_directory(new_data_dir)

# 3. Decrease pixel size to better train the network
new_pre_processed_data = scraped_test_data.map(lambda x,y: (x/255, y))

# 4. Evaluate the model on the new data
test_loss, test_accuracy, test_f1_score = base_model.evaluate(new_pre_processed_data)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Test F1 Score: {test_f1_score}")



<h1>Saving the model</h1>

In [None]:
base_model.save('lunarleaf.h5')