In [2]:
pip install numpy matplotlib tensorflow

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import random
import numpy as np
from io import BytesIO

# Plotting and dealing with images
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import tensorflow as tf

Mixed precision policy: <DTypePolicy "mixed_float16">


# Preview Dataset

In [4]:
BASE_DIR = './Datasets/PlantVillage/train'

# Check the subdirectories
print(f"\nsubdirectories within '{BASE_DIR}' dir: {os.listdir(BASE_DIR)}")

# first class folder
first_class = os.listdir(BASE_DIR)[0]
first_class_path = os.path.join(BASE_DIR, first_class)

# Pick the first image inside that class folder
first_image = os.listdir(first_class_path)[0]
sample_image = os.path.join(first_class_path, first_image)

# Load the image
sample_image  = tf.keras.utils.load_img(sample_image)

# Convert the image into its numpy array representation
sample_array = tf.keras.utils.img_to_array(sample_image)

print(f"Image shape: {sample_array.shape}")


subdirectories within './Datasets/PlantVillage/train' dir: ['Apple___Apple_scab', 'Apple___Black_rot', 'Apple___Cedar_apple_rust', 'Apple___healthy', 'Background_without_leaves', 'Blueberry___healthy', 'Cherry___healthy', 'Cherry___Powdery_mildew', 'Corn___Cercospora_leaf_spot Gray_leaf_spot', 'Corn___Common_rust', 'Corn___healthy', 'Corn___Northern_Leaf_Blight', 'Grape___Black_rot', 'Grape___Esca_(Black_Measles)', 'Grape___healthy', 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)', 'Orange___Haunglongbing_(Citrus_greening)', 'Peach___Bacterial_spot', 'Peach___healthy', 'Pepper,_bell___Bacterial_spot', 'Pepper,_bell___healthy', 'Potato___Early_blight', 'Potato___healthy', 'Potato___Late_blight', 'Raspberry___healthy', 'Soybean___healthy', 'Squash___Powdery_mildew', 'Strawberry___healthy', 'Strawberry___Leaf_scorch', 'Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___healthy', 'Tomato___Late_blight', 'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two

# Preprocess dataset
Normalize and splitting data set to train and test

In [11]:
def preprocessing_data():
    """Loads and splits the PlantVillage dataset into train and validation sets.
    Pixel values are normalized to [0,1].

    Returns:
        tuple: (train_dataset, val_dataset)
    """

    train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
        directory=BASE_DIR,
        validation_split=0.2, # 20% into validation         
        subset='training',  
         seed=42,
        image_size=(256, 256),       
        batch_size=10,                
        label_mode='int'             
    )

    val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
        directory=BASE_DIR,
        validation_split=0.2,         
        subset='validation',         
        seed=42,
        image_size=(256, 256),
        batch_size=10,
        label_mode='int'
    )

    # Normalize pixel values
    rescale_layer = tf.keras.layers.Rescaling(1.0 / 255)
    train_dataset = train_dataset.map(lambda x, y: (rescale_layer(x), y))
    val_dataset = val_dataset.map(lambda x, y: (rescale_layer(x), y))


    return train_dataset, val_dataset


# Define Model

Images are in size of (256, 256, 3). Input size set to that size and 5 convoulutional and pooling layers are definiting.

In [6]:
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 256x256 with 3 bytes color
    # The first convolution
    tf.keras.Input(shape=(256, 256, 3)),
    tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fifth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    # Only 1 output neuron. It will give probability for each class since softmax is the activation function
    tf.keras.layers.Dense(39, dtype='float32', activation='softmax')
])

In [7]:
model.summary()

# Compile the model

In [8]:

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',  
        metrics=['accuracy']
    )

Check the dataset compatible with the model architecture

In [12]:
train_data, val_dataset = preprocessing_data()

# Get the first batch of images and labels
for images, labels in train_data.take(1):
	example_batch_images = images
	example_batch_labels = labels

try:
	model.evaluate(example_batch_images, example_batch_labels, verbose=False)
except:
	print("Your model is not compatible with the dataset you defined earlier. Check that the loss function, last layer and label_mode are compatible with one another.")
else:
	predictions = model.predict(example_batch_images, verbose=False)
	print(f"predictions have shape: {predictions.shape}")

Found 61486 files belonging to 39 classes.
Using 49189 files for training.
Found 61486 files belonging to 39 classes.
Using 12297 files for validation.
predictions have shape: (10, 39)


# Train the model

In [13]:
history = model.fit(
    train_data,
    validation_data=val_dataset,
    epochs=15,
)

Epoch 1/15
[1m   9/4919[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:19:58[0m 10s/step - accuracy: 0.0956 - loss: 3.5184    

KeyboardInterrupt: 