In [None]:
!pip install -q kaggle
!pip install opendatasets

In [None]:
import os
import shutil
import random
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras import layers
from google.colab import files
import opendatasets as od

In [None]:
files.upload()

In [4]:
od.download(
    "https://www.kaggle.com/datasets/emmarex/plantdisease")

Dataset URL: https://www.kaggle.com/datasets/emmarex/plantdisease
Downloading plantdisease.zip to ./plantdisease


100%|██████████| 658M/658M [00:03<00:00, 201MB/s]





In [None]:
original_dir = '/content/plantdisease/plantvillage/PlantVillage'
processed_dir = '/content/plantvillage_gs'
split_ratio = 0.2
IMG_SIZE = (128, 128)

if os.path.exists(processed_dir):
    shutil.rmtree(processed_dir)
os.makedirs(processed_dir, exist_ok=True)

def augment_and_save(img, save_path):
    if random.random() > 0.5:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)
    if random.random() > 0.5:
        img = img.transpose(Image.FLIP_TOP_BOTTOM)
    angle = random.choice([0, 90, 180, 270])
    if angle != 0:
        img = img.rotate(angle)
    img.save(save_path)

for class_name in os.listdir(original_dir):
    class_dir = os.path.join(original_dir, class_name)
    if not os.path.isdir(class_dir):
        continue
    images = [img for img in os.listdir(class_dir) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(images)
    split_idx = int(len(images) * (1 - split_ratio))
    train_imgs = images[:split_idx]
    val_imgs = images[split_idx:]

    for split, split_imgs in zip(['train', 'val'], [train_imgs, val_imgs]):
        split_class_dir = os.path.join(processed_dir, split, class_name)
        os.makedirs(split_class_dir, exist_ok=True)
        for img in split_imgs:
            src_img_path = os.path.join(class_dir, img)
            dst_img_path = os.path.join(split_class_dir, img)
            try:
                im = Image.open(src_img_path).convert('L').resize(IMG_SIZE)
                augment_and_save(im, dst_img_path)
            except Exception as e:
                print(f"Error processing {src_img_path}: {e}")
                continue

print('All images converted to grayscale, augmented, and saved.')

All images converted to grayscale, augmented, and saved.


In [6]:
BATCH_SIZE = 32

def make_conv_autoencoder_dataset(directory, img_size, batch_size):
    ds = tf.keras.utils.image_dataset_from_directory(
        directory,
        labels=None,
        image_size=img_size,
        batch_size=batch_size,
        shuffle=True,
        color_mode='grayscale'
    )
    ds = ds.map(lambda x: (x / 255.0, x / 255.0))
    return ds

train_dir = "/content/plantvillage_gs/train"
val_dir = "/content/plantvillage_gs/val"
train_ds = make_conv_autoencoder_dataset(train_dir, IMG_SIZE, BATCH_SIZE)
val_ds = make_conv_autoencoder_dataset(val_dir, IMG_SIZE, BATCH_SIZE)

Found 16504 files.
Found 4134 files.


In [7]:
def build_conv_autoencoder(img_shape):
    inputs = tf.keras.Input(shape=img_shape)
    x = layers.Conv2D(64, (3,3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2,2), padding='same')(x)
    x = layers.Conv2D(32, (3,3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2,2), padding='same')(x)
    x = layers.Conv2D(8, (3,3), activation='relu', padding='same')(x)
    encoded = layers.MaxPooling2D((2,2), padding='same')(x)

    x = layers.Conv2D(8, (3,3), activation='relu', padding='same')(encoded)
    x = layers.UpSampling2D((2,2))(x)
    x = layers.Conv2D(32, (3,3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2,2))(x)
    x = layers.Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2,2))(x)
    outputs = layers.Conv2D(1, (3,3), activation='sigmoid', padding='same')(x)

    autoencoder = tf.keras.Model(inputs, outputs)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

autoencoder = build_conv_autoencoder((128, 128, 1))

In [8]:
autoencoder.summary()

In [9]:
EPOCHS = 20
history = autoencoder.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    ]
)

Epoch 1/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 39ms/step - loss: 0.0099 - val_loss: 0.0039
Epoch 2/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 30ms/step - loss: 0.0037 - val_loss: 0.0033
Epoch 3/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 30ms/step - loss: 0.0033 - val_loss: 0.0031
Epoch 4/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - loss: 0.0031 - val_loss: 0.0030
Epoch 5/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 30ms/step - loss: 0.0030 - val_loss: 0.0029
Epoch 6/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 31ms/step - loss: 0.0028 - val_loss: 0.0028
Epoch 7/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - loss: 0.0028 - val_loss: 0.0027
Epoch 8/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - loss: 0.0027 - val_loss: 0.0026
Epoch 9/20
[1m516/516[

In [12]:
def reconstruction_errors(model, X):
    X_pred = model.predict(X, batch_size=32)
    errors = np.mean(np.abs(X - X_pred), axis=1)
    return errors

X_val_tomato_flat = np.concatenate([x for x, _ in val_ds], axis=0)
err_tomato = reconstruction_errors(autoencoder, X_val_tomato_flat)

[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step


In [14]:
print(f"Tomato Error: {err_tomato.max()}")

Tomato Error: 0.12488341331481934


In [11]:
autoencoder.save('/content/autoencoder.h5')
print('Autoencoder saved!')



Autoencoder saved!
