In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split



In [2]:
import kagglehub

path = kagglehub.dataset_download("imbikramsaha/paddy-doctor")
print("Dataset path:", path)


Downloading from https://www.kaggle.com/api/v1/datasets/download/imbikramsaha/paddy-doctor?dataset_version_number=1...


100%|██████████| 1.02G/1.02G [00:25<00:00, 42.7MB/s]

Extracting files...





Dataset path: /root/.cache/kagglehub/datasets/imbikramsaha/paddy-doctor/versions/1


In [3]:
!ls /root/.cache/kagglehub/datasets/imbikramsaha/paddy-doctor/versions/1/paddy-disease-classification


sample_submission.csv  test_images  train.csv  train_images


In [4]:
dataset_root_path = os.path.join(path, 'paddy-disease-classification')
train_csv_path = os.path.join(dataset_root_path, 'train.csv')
train_images_path = os.path.join(dataset_root_path, 'train_images')


In [5]:
full_df = pd.read_csv(train_csv_path)


In [6]:
print(full_df.columns)


Index(['image_id', 'label', 'variety', 'age'], dtype='object')


In [7]:
labels = sorted(full_df["label"].unique())
print(labels)


['bacterial_leaf_blight', 'bacterial_leaf_streak', 'bacterial_panicle_blight', 'blast', 'brown_spot', 'dead_heart', 'downy_mildew', 'hispa', 'normal', 'tungro']


In [8]:
import os

dataset_path = "/kaggle/input/paddy-doctor"

for root, dirs, files in os.walk(dataset_path):
    level = root.replace(dataset_path, '').count(os.sep)
    indent = ' ' * 4 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 4 * (level + 1)
    for f in files[:5]:  # only first 5 files
        print(f"{subindent}{f}")


In [9]:
train_df, val_df = train_test_split(
    full_df,
    test_size=0.1,
    stratify=full_df['label'],
    random_state=42
)

print(len(train_df), len(val_df))


9366 1041


In [10]:
import numpy as np

label_counts = train_df['label'].value_counts()
label_order = sorted(train_df['label'].unique())

counts = np.array([label_counts[label] for label in label_order], dtype=float)
class_weights = 1.0 / counts
class_weights = class_weights / class_weights.sum()

# Keras expects a dictionary: {class_index: weight}
class_index_mapping = {label: idx for idx, label in enumerate(label_order)}
class_weights_dict = {class_index_mapping[label]: weight for label, weight in zip(label_order, class_weights)}

print(class_weights_dict)


{0: np.float64(0.1521780416341574), 1: np.float64(0.19177992966175975), 2: np.float64(0.21646447506376845), 3: np.float64(0.04193653193370962), 4: np.float64(0.07556305984368876), 5: np.float64(0.05053061320826028), 6: np.float64(0.11754253753462696), 7: np.float64(0.045706436198133686), 8: np.float64(0.041302730443527606), 9: np.float64(0.06699564447836756)}


In [11]:
import tensorflow as tf
import numpy as np

IMG_SIZE = 224
BATCH_SIZE = 32
DATA_DIR = "/root/.cache/kagglehub/datasets/imbikramsaha/paddy-doctor/versions/1/paddy-disease-classification/train_images"
EPOCHS = 10


In [22]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(224,224),
    batch_size=32,
    label_mode="int",
    validation_split=0.1,
    subset="training",
    seed=42
)

val_dataset = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(224,224),
    batch_size=32,
    label_mode="int",
    validation_split=0.1,
    subset="validation",
    seed=42
)

class_names = train_dataset.class_names

# ✅ Print the class names in index order
print("Class names in dataset (index order):")
print(class_names)


Found 10407 files belonging to 10 classes.
Using 9367 files for training.
Found 10407 files belonging to 10 classes.
Using 1040 files for validation.
Class names in dataset (index order):
['bacterial_leaf_blight', 'bacterial_leaf_streak', 'bacterial_panicle_blight', 'blast', 'brown_spot', 'dead_heart', 'downy_mildew', 'hispa', 'normal', 'tungro']


In [21]:
# Print the class names in index order
print("Class names in model index order:")
print(train_dataset.class_names)


Class names in model index order:


AttributeError: '_ParallelMapDataset' object has no attribute 'class_names'

In [13]:
from tensorflow.keras.applications.efficientnet import preprocess_input

train_dataset = train_dataset.map(
    lambda x, y: (preprocess_input(x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

val_dataset = val_dataset.map(
    lambda x, y: (preprocess_input(x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)


In [14]:
import numpy as np

label_counts = {i: 0 for i in range(len(class_names))}

for _, labels in train_dataset.unbatch():
    label_counts[int(labels.numpy())] += 1

class_weights_dict = {
    i: max(label_counts.values()) / count
    for i, count in label_counts.items()
}

print(class_weights_dict)


{0: 3.6492027334851938, 1: 4.711764705882353, 2: 5.322259136212624, 3: 1.0203821656050955, 4: 1.8287671232876712, 5: 1.237065637065637, 6: 2.907441016333938, 7: 1.132155477031802, 8: 1.0, 9: 1.638036809815951}


In [15]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models

num_classes = len(class_names)

base_model = EfficientNetB0(
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    weights='imagenet'
)

# ----------------------------
# Step 1: Train head first
# ----------------------------
base_model.trainable = False  # freeze backbone

inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)

model = models.Model(inputs, outputs)

# Compile for head training
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train head first
history_head = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5
)

# ----------------------------
# Step 2: Fine-tune top layers
# ----------------------------
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history_fine = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5
)


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 196ms/step - accuracy: 0.3790 - loss: 1.8031 - val_accuracy: 0.5712 - val_loss: 1.2669
Epoch 2/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 76ms/step - accuracy: 0.5632 - loss: 1.2814 - val_accuracy: 0.6567 - val_loss: 1.0695
Epoch 3/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 80ms/step - accuracy: 0.6211 - loss: 1.1384 - val_accuracy: 0.6615 - val_loss: 0.9923
Epoch 4/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 81ms/step - accuracy: 0.6581 - loss: 1.0493 - val_accuracy: 0.6990 - val_loss: 0.9214
Epoch 5/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 87ms/step - accuracy: 0.6636 - loss: 1.0004 - val_accuracy: 0.6981 - val_loss: 0.8789
Epo

In [16]:
import tensorflow as tf

# 1️⃣ Save the trained Keras model (optional)
model.save("pest_disease_detection_model.h5")  # HDF5 backup

# 2️⃣ Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Optional optimization
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = converter.convert()

# 3️⃣ Save as TFLite
with open("pest_disease_detection_model.tflite", "wb") as f:
    f.write(tflite_model)

print("TFLite model saved as pest_disease_detection_model.tflite")




Saved artifact at '/tmp/tmpc1kxzecs'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_238')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  135642602129744: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  135642602131664: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  135642593165584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135642593585552: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135642593433104: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135642593434832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135642593176720: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135642593621776: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135642593611024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135642593623312: TensorSpec(shape=(), dtype=tf.resource, name

In [18]:
import tensorflow as tf
import numpy as np

# 1️⃣ Load the TFLite model
tflite_model_path = "/content/pest_disease_detection_model.tflite"
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# 2️⃣ Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_shape = input_details[0]['shape']
print(f"Model input shape: {input_shape}")

# 3️⃣ Function to predict a single image
def tflite_predict(image):
    # Resize and normalize image
    img = tf.image.resize(image, (input_shape[1], input_shape[2]))
    img = tf.expand_dims(img, axis=0)  # batch dimension
    img = tf.cast(img, tf.float32) / 255.0  # normalization

    interpreter.set_tensor(input_details[0]['index'], img.numpy())
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    return np.argmax(output, axis=1)[0]

# Example usage:
# pred_class = tflite_predict(some_image_tensor)
# print("Predicted class index:", pred_class)


Model input shape: [  1 224 224   3]


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [23]:
from tensorflow.keras.applications.efficientnet import preprocess_input

def tflite_predict(image):
    # Resize
    img = tf.image.resize(image, (input_shape[1], input_shape[2]))
    img = tf.expand_dims(img, axis=0)  # batch dimension
    img = tf.cast(img, tf.float32)

    # Use EfficientNet preprocessing
    img = preprocess_input(img)

    interpreter.set_tensor(input_details[0]['index'], img.numpy())
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    return np.argmax(output, axis=1)[0]


In [24]:
import tensorflow as tf

# Path to your image
img_path = "/content/100004.jpg"

# Load image
img = tf.keras.preprocessing.image.load_img(img_path)
img_tensor = tf.keras.preprocessing.image.img_to_array(img)

# Run TFLite model inference
pred_idx = tflite_predict(img_tensor)

print(f"Predicted class index: {pred_idx}")


Predicted class index: 3


In [25]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.efficientnet import preprocess_input
from PIL import Image

# ----- 1️⃣ Load TFLite model -----
tflite_model_path = "/content/pest_disease_detection_model.tflite"
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]['shape']

# ----- 2️⃣ Define class names in the correct order -----
class_names = [
    'bacterial_leaf_blight',
    'bacterial_leaf_streak',
    'bacterial_panicle_blight',
    'blast',
    'brown_spot',
    'dead_heart',
    'downy_mildew',
    'hispa',
    'normal',
    'tungro'
]

# ----- 3️⃣ Function to run inference and get raw outputs -----
def tflite_predict_raw(image_path):
    # Load image
    img = Image.open(image_path).convert('RGB')
    img = img.resize((input_shape[2], input_shape[1]))  # resize to model input
    img = np.array(img, dtype=np.float32)

    # Preprocess like EfficientNet
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)  # add batch dimension

    # Set tensor and invoke
    interpreter.set_tensor(input_details[0]['index'], img)
    interpreter.invoke()

    # Get raw predictions
    output = interpreter.get_tensor(output_details[0]['index'])[0]  # remove batch dim
    return output

# ----- 4️⃣ Run inference -----
image_path = "/content/100008.jpg"
raw_preds = tflite_predict_raw(image_path)

# Print raw outputs
print("Raw output probabilities/scores:")
for cls_name, score in zip(class_names, raw_preds):
    print(f"{cls_name}: {score:.4f}")

# Optional: predicted class
pred_idx = np.argmax(raw_preds)
print(f"\nPredicted class: {class_names[pred_idx]} (index {pred_idx})")


Raw output probabilities/scores:
bacterial_leaf_blight: 0.0079
bacterial_leaf_streak: 0.0003
bacterial_panicle_blight: 0.0016
blast: 0.0007
brown_spot: 0.0016
dead_heart: 0.9217
downy_mildew: 0.0004
hispa: 0.0042
normal: 0.0323
tungro: 0.0292

Predicted class: dead_heart (index 5)


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
