In [4]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import argparse
from pathlib import Path

# Define the classes (diseases and healthy)
classes = [
    'Tomato_Late_blight',
    'Tomato_Leaf_Mold',
    'Tomato_Septoria_leaf_spot',
    'Tomato_Early_blight',
    'Tomato_Spider_mites_Two_spotted_spider_mite',
    'Tomato__Target_Spot',
    'Tomato_Bacterial_spot',
    'Tomato__Tomato_mosaic_virus',
    'Tomato_healthy',
    'Tomato__Tomato_YellowLeaf__Curl_Virus'
]

num_classes = len(classes)
class_to_index = {cls: idx for idx, cls in enumerate(classes)}
index_to_class = {idx: cls for idx, cls in enumerate(classes)}

# Brief descriptions of problems (for reference)
disease_descriptions = {
    'Tomato_Late_blight': 'Fungal disease causing dark, water-soaked lesions on leaves, stems, and fruits; can lead to plant death if untreated.',
    'Tomato_Leaf_Mold': 'Fungal infection (Passalora fulva) resulting in yellow spots on upper leaf surfaces and grayish mold on undersides; reduces photosynthesis.',
    'Tomato_Septoria_leaf_spot': 'Fungal spots with dark borders and yellow halos on lower leaves; spreads in wet conditions, defoliating the plant.',
    'Tomato_Early_blight': 'Fungal disease (Alternaria solani) with concentric rings on leaves and stems; affects older plants, leading to yield loss.',
    'Tomato_Spider_mites_Two_spotted_spider_mite': 'Pest infestation causing stippling and webbing on leaves; mites suck sap, leading to yellowing and leaf drop.',
    'Tomato__Target_Spot': 'Fungal spots (Corynespora cassiicola) resembling targets on leaves; can cause defoliation and fruit spotting.',
    'Tomato_Bacterial_spot': 'Bacterial infection causing small, water-soaked spots that turn dark with yellow halos; affects leaves and fruits.',
    'Tomato__Tomato_mosaic_virus': 'Viral disease causing mottled, mosaic-like patterns on leaves; stunts growth and reduces fruit quality.',
    'Tomato_healthy': 'No disease detected; plant is healthy.',
    'Tomato__Tomato_YellowLeaf__Curl_Virus': 'Viral infection transmitted by whiteflies, causing upward leaf curling, yellowing, and stunted growth.'
}

def create_data_generators(dataset_dir, img_height=256, img_width=256, batch_size=128, validation_split=0.2):
    """
    Create training and validation data generators with augmentation.
    
    Args:
        dataset_dir (str): Path to the dataset directory.
        img_height, img_width (int): Target image dimensions.
        batch_size (int): Batch size for generators.
        validation_split (float): Fraction of data for validation.
    
    Returns:
        tuple: (train_generator, validation_generator)
    """
    if not os.path.exists(dataset_dir):
        raise FileNotFoundError(f"Dataset directory '{dataset_dir}' does not exist. Please provide a valid path.")
    
    # Training data generator with augmentation
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest',
        validation_split=validation_split
    )
    
    # Validation data generator (no augmentation, just rescaling)
    val_datagen = ImageDataGenerator(rescale=1./255, validation_split=validation_split)
    
    train_generator = train_datagen.flow_from_directory(
        dataset_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',
        shuffle=True
    )
    
    validation_generator = val_datagen.flow_from_directory(
        dataset_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation',
        shuffle=False
    )
    
    print(f"Found {train_generator.samples} training images and {validation_generator.samples} validation images.")
    print(f"Classes found: {train_generator.class_indices}")
    
    return train_generator, validation_generator

def build_cnn_model(img_height, img_width, num_classes):
    """
    Build an improved CNN model with BatchNormalization for better training stability.
    
    Args:
        img_height, img_width (int): Input image dimensions.
        num_classes (int): Number of output classes.
    
    Returns:
        Sequential: Compiled Keras model.
    """
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        Flatten(),
        Dropout(0.5),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    model.summary()
    return model

def train_model(model, train_generator, validation_generator, epochs=5, model_save_path='tomato_disease_cnn_5.h5'):
    """
    Train the model with callbacks for early stopping, checkpointing, and LR reduction.
    
    Args:
        model: Compiled Keras model.
        train_generator: Training data generator.
        validation_generator: Validation data generator.
        epochs (int): Maximum number of epochs.
        model_save_path (str): Path to save the best model.
    
    Returns:
        History: Training history object.
    """
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
        ModelCheckpoint(model_save_path, monitor='val_accuracy', save_best_only=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)
    ]
    
    history = model.fit(
        train_generator,
        steps_per_epoch=max(1, train_generator.samples // train_generator.batch_size),
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=max(1, validation_generator.samples // validation_generator.batch_size),
        callbacks=callbacks,
        verbose=1
    )
    
    return history

def plot_training_history(history):
    """
    Plot accuracy and loss curves from training history.
    
    Args:
        history: Training history object.
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    
    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    
    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

def predict_disease(image_path, model, top_k=3):
    """
    Predict disease on a single image with top-k predictions and confidence.
    
    Args:
        image_path (str): Path to the input image.
        model: Trained Keras model.
        top_k (int): Number of top predictions to return.
    
    Returns:
        dict: Dictionary with top predictions, confidences, and description for the top one.
    """
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
    
    # Load and preprocess the image
    try:
        img = Image.open(image_path).convert('RGB').resize((model.input_shape[1:3]))
        img_array = np.array(img, dtype=np.float32) / 255.0
        img_array = np.expand_dims(img_array, axis=0)
    except Exception as e:
        raise ValueError(f"Error processing image: {e}")
    
    # Predict
    predictions = model.predict(img_array, verbose=0)
    top_indices = np.argsort(predictions[0])[::-1][:top_k]
    
    results = {
        'top_predictions': [
            {'class': index_to_class[idx], 'confidence': float(predictions[0][idx])}
            for idx in top_indices
        ],
        'top_prediction_description': disease_descriptions.get(index_to_class[top_indices[0]], 'Unknown disease.')
    }
    
    return results

def main(dataset_dir, epochs=20, batch_size=128, img_size=256, train_mode=True, image_path=None):
    """
    Main function to run the entire pipeline.
    
    Args:
        dataset_dir (str): Path to dataset.
        epochs (int): Training epochs.
        batch_size (int): Batch size.
        img_size (int): Image size (square).
        train_mode (bool): Whether to train or just predict.
        image_path (str, optional): Path to test image if not training.
    """
    img_height, img_width = img_size, img_size
    
    if train_mode:
        # Create generators
        train_gen, val_gen = create_data_generators(dataset_dir, img_height, img_width, batch_size)
        
        # Build model
        model = build_cnn_model(img_height, img_width, num_classes)
        
        # Train
        history = train_model(model, train_gen, val_gen, epochs)
        
        # Plot history
        plot_training_history(history)
        
        print("Training completed. Model saved as 'tomato_disease_cnn.h5'.")
        
        # Example prediction (uncomment and provide path if dataset has test images)
        # results = predict_disease('path/to/test/image.jpg', model)
        # print(f"Top Prediction: {results['top_predictions'][0]['class']} (Confidence: {results['top_predictions'][0]['confidence']:.2f})")
        # print(f"Description: {results['top_prediction_description']}")
    
    else:
        # Load model for prediction only
        if not os.path.exists('tomato_disease_cnn.h5'):
            raise FileNotFoundError("Trained model 'tomato_disease_cnn.h5' not found. Train the model first.")
        
        model = tf.keras.models.load_model('tomato_disease_cnn.h5')
        results = predict_disease(image_path, model)
        
        print("Prediction Results:")
        for pred in results['top_predictions']:
            print(f"- {pred['class']}: {pred['confidence']:.2f}")
        
        top_pred = results['top_predictions'][0]
        if top_pred['class'] == 'Tomato_healthy':
            print("The plant is healthy!")
        else:
            print(f"Problem: {results['top_prediction_description']}")


In [5]:
import sys

if __name__ == "__main__":
    if "ipykernel" in sys.modules:
        # Running inside Jupyter Notebook
        main(
            dataset_dir="/home/divyansh/Music/archive/PlantVillage",
            epochs=5,
            batch_size=128,
            img_size=256,
            train_mode=True
        )
    else:
        # Running from command line
        parser = argparse.ArgumentParser(description="Tomato Disease Detection CNN")
        parser.add_argument('--dataset_dir', type=str, required=True, help="Path to dataset directory")
        parser.add_argument('--epochs', type=int, default=20)
        parser.add_argument('--batch_size', type=int, default=32)
        parser.add_argument('--img_size', type=int, default=224)
        parser.add_argument('--predict_only', action='store_true')
        parser.add_argument('--image_path', type=str)
        args = parser.parse_args()
        
        if args.predict_only:
            if not args.image_path:
                raise ValueError("Image path is required in prediction mode.")
            main(args.dataset_dir, args.epochs, args.batch_size, args.img_size, train_mode=False, image_path=args.image_path)
        else:
            main(args.dataset_dir, args.epochs, args.batch_size, args.img_size, train_mode=True)


Found 12813 images belonging to 10 classes.
Found 3198 images belonging to 10 classes.
Found 12813 training images and 3198 validation images.
Classes found: {'Tomato_Bacterial_spot': 0, 'Tomato_Early_blight': 1, 'Tomato_Late_blight': 2, 'Tomato_Leaf_Mold': 3, 'Tomato_Septoria_leaf_spot': 4, 'Tomato_Spider_mites_Two_spotted_spider_mite': 5, 'Tomato__Target_Spot': 6, 'Tomato__Tomato_YellowLeaf__Curl_Virus': 7, 'Tomato__Tomato_mosaic_virus': 8, 'Tomato_healthy': 9}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.5724 - loss: 1.5991
Epoch 1: val_accuracy improved from None to 0.13835, saving model to tomato_disease_cnn_5.h5




[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m738s[0m 7s/step - accuracy: 0.6825 - loss: 1.0605 - val_accuracy: 0.1383 - val_loss: 6.2416 - learning_rate: 0.0010
Epoch 2/5
[1m  1/100[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11:36[0m 7s/step - accuracy: 0.8047 - loss: 0.5426




Epoch 2: val_accuracy did not improve from 0.13835
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 317ms/step - accuracy: 0.8047 - loss: 0.5426 - val_accuracy: 0.1383 - val_loss: 6.4030 - learning_rate: 0.0010
Epoch 3/5
[1m 30/100[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m8:14[0m 7s/step - accuracy: 0.7977 - loss: 0.6120

KeyboardInterrupt: 

In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import numpy as np

# -------------------
# Config
# -------------------
dataset_dir = "/home/divyansh/Music/archive/PlantVillage"
img_height, img_width = 256, 256
batch_size = 32
validation_split = 0.2
epochs = 10
lr = 0.001

# -------------------
# Data Generators
# -------------------
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest',
    validation_split=validation_split
)

val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=validation_split
)

train_generator = train_datagen.flow_from_directory(
    dataset_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

validation_generator = val_datagen.flow_from_directory(
    dataset_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# -------------------
# Model Architecture
# -------------------
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(2,2),
    
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_generator.num_classes, activation='softmax')
])

model.compile(
    optimizer=Adam(learning_rate=lr),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# -------------------
# Callbacks
# -------------------
checkpoint = ModelCheckpoint(
    "tomato_disease_cnn.keras",
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)
earlystop = EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1)

# -------------------
# Training
# -------------------
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=epochs,
    callbacks=[checkpoint, earlystop, reduce_lr],
    verbose=1
)

# -------------------
# Save class mapping for Flask
# -------------------
index_to_class = {v: k for k, v in train_generator.class_indices.items()}
np.save('index_to_class.npy', index_to_class)

print("✅ Training complete. Model and class mapping saved.")

model.save('tomato_disease_cnn.keras')


Found 12813 images belonging to 10 classes.
Found 3198 images belonging to 10 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m363s[0m 902ms/step - accuracy: 0.4131 - loss: 1.6890 - val_accuracy: 0.5882 - val_loss: 1.2246 - learning_rate: 0.0010
Epoch 2/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m388s[0m 968ms/step - accuracy: 0.6014 - loss: 1.1774 - val_accuracy: 0.5566 - val_loss: 1.3657 - learning_rate: 0.0010
Epoch 3/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 950ms/step - accuracy: 0.6436 - loss: 1.0156 - val_accuracy: 0.6629 - val_loss: 1.1432 - learning_rate: 0.0010
Epoch 4/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 957ms/step - accuracy: 0.6880 - loss: 0.8939 - val_accuracy: 0.7849 - val_loss: 0.5695 - learning_rate: 0.0010
Epoch 5/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 912ms/step - accuracy: 0.7102 - loss: 0.8215 - val_accuracy: 0.6792 - val_loss: 0.9334 - learning_rate: 0.0010
Epoch 6/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━

In [1]:
import pickle
from tensorflow import keras

# Load your model
model = keras.models.load_model("tomato_disease_cnn.keras")

# Save it as a pickle file
with open("tomato_disease_cnn.pkl", "wb") as f:
    pickle.dump(model, f)


2025-11-06 10:55:19.125173: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-06 10:55:19.125430: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-06 10:55:19.165459: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-06 10:55:20.218252: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To tur

In [4]:
import tensorflow as tf
model = tf.keras.models.load_model("/home/divyansh/Desktop/tomato_disease_cnn.keras")

# Convert the model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the converted model
with open("tomato_disease_cnn.tflite", "wb") as f:
    f.write(tflite_model)
print("done")

INFO:tensorflow:Assets written to: /tmp/tmpbbtc48a5/assets


INFO:tensorflow:Assets written to: /tmp/tmpbbtc48a5/assets


Saved artifact at '/tmp/tmpbbtc48a5'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name='input_layer_2')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  131383860727440: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131380093263056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131381379727184: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131381379726992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131381379720848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131381379726416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131380093259984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131380093259024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131380093260944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  131380093260560: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1762408945.600683   53477 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
W0000 00:00:1762408945.600708   53477 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-11-06 11:32:25.601018: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpbbtc48a5
2025-11-06 11:32:25.601546: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-11-06 11:32:25.601567: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpbbtc48a5
I0000 00:00:1762408945.605742   53477 mlir_graph_optimization_pass.cc:437] MLIR V1 optimization pass is not enabled
2025-11-06 11:32:25.606549: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-11-06 11:32:25.711272: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpbbtc48a5
2025-11-06 11:32:25.721036: I tensorflow/cc/saved_model/loader.cc:471] SavedModel 

done
