In [46]:
# 🍅 Tomato Leaf Disease Classification

import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import numpy as np
import os

# Constants
IMAGE_SIZE = 128
BATCH_SIZE = 32
EPOCHS = 20
DATASET_PATH =  "R:/coding/Disease_Prediction/Tomato_img"


In [47]:
train_ds = train_ds.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))


In [48]:
# Load dataset with validation split
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_PATH,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_PATH,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

# Save class names before mapping
class_names = train_ds.class_names

# Then apply mapping
train_ds = train_ds.map(safe_cast, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(safe_cast, num_parallel_calls=AUTOTUNE)



Found 6000 files belonging to 6 classes.
Using 4800 files for training.
Found 6000 files belonging to 6 classes.
Using 1200 files for validation.


In [49]:
# Safe normalization to avoid OverflowError
def safe_cast(image, label):
    try:
        image = tf.cast(image, tf.float32) / 255.0
    except:
        image = tf.zeros([IMAGE_SIZE, IMAGE_SIZE, 3], dtype=tf.float32)
    return image, label

train_ds = train_ds.map(safe_cast, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(safe_cast, num_parallel_calls=AUTOTUNE)


In [51]:
# CNN Model
n_classes = 6
model = models.Sequential([
   tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(n_classes, activation='softmax')

])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [52]:
from PIL import Image
import os

def verify_images_in_folder(folder_path):
    bad_images = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            try:
                img_path = os.path.join(root, file)
                img = Image.open(img_path)
                img.verify()  # Verify image integrity
            except Exception as e:
                bad_images.append(img_path)
    return bad_images

# ✅ Run for your tomato dataset with updated path
dataset_path = "R:/coding/Disease_Prediction/Tomato_img"
bad_files = verify_images_in_folder(dataset_path)

print("🧹 Found bad files:", bad_files)

# Optional: delete bad files
for f in bad_files:
    os.remove(f)



🧹 Found bad files: []


In [53]:
# Train the model
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)


Epoch 1/20


OverflowError: Python int too large to convert to C long