<a href="https://colab.research.google.com/github/aneeq-shaffy/DL-labsheets/blob/main/Pest_solution_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split



In [2]:
import kagglehub

path = kagglehub.dataset_download("imbikramsaha/paddy-doctor")
print("Dataset path:", path)


Using Colab cache for faster access to the 'paddy-doctor' dataset.
Dataset path: /kaggle/input/paddy-doctor


In [3]:
dataset_root_path = os.path.join(path, 'paddy-disease-classification')
train_csv_path = os.path.join(dataset_root_path, 'train.csv')
train_images_path = os.path.join(dataset_root_path, 'train_images')


In [4]:
full_df = pd.read_csv(train_csv_path)


In [5]:
print(full_df.columns)


Index(['image_id', 'label', 'variety', 'age'], dtype='object')


In [6]:
labels = sorted(full_df["label"].unique())
print(labels)


['bacterial_leaf_blight', 'bacterial_leaf_streak', 'bacterial_panicle_blight', 'blast', 'brown_spot', 'dead_heart', 'downy_mildew', 'hispa', 'normal', 'tungro']


In [7]:
import os

dataset_path = "/kaggle/input/paddy-doctor"

for root, dirs, files in os.walk(dataset_path):
    level = root.replace(dataset_path, '').count(os.sep)
    indent = ' ' * 4 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 4 * (level + 1)
    for f in files[:5]:  # only first 5 files
        print(f"{subindent}{f}")


paddy-doctor/
    paddy-disease-classification/
        sample_submission.csv
        .jovianrc
        train.csv
        train_images/
            tungro/
                109629.jpg
                104765.jpg
                109706.jpg
                100098.jpg
                102734.jpg
            hispa/
                100502.jpg
                107167.jpg
                106262.jpg
                102866.jpg
                104880.jpg
            downy_mildew/
                101119.jpg
                105381.jpg
                110270.jpg
                110143.jpg
                105940.jpg
            bacterial_leaf_streak/
                103494.jpg
                106019.jpg
                103874.jpg
                103325.jpg
                109858.jpg
            bacterial_leaf_blight/
                109940.jpg
                105979.jpg
                104324.jpg
                109428.jpg
                106615.jpg
            brown_spot/
                104821.jpg
   

In [8]:
train_df, val_df = train_test_split(
    full_df,
    test_size=0.1,
    stratify=full_df['label'],
    random_state=42
)

print(len(train_df), len(val_df))


9366 1041


In [9]:
import numpy as np

label_counts = train_df['label'].value_counts()
label_order = sorted(train_df['label'].unique())

counts = np.array([label_counts[label] for label in label_order], dtype=float)
class_weights = 1.0 / counts
class_weights = class_weights / class_weights.sum()

# Keras expects a dictionary: {class_index: weight}
class_index_mapping = {label: idx for idx, label in enumerate(label_order)}
class_weights_dict = {class_index_mapping[label]: weight for label, weight in zip(label_order, class_weights)}

print(class_weights_dict)


{0: np.float64(0.1521780416341574), 1: np.float64(0.19177992966175975), 2: np.float64(0.21646447506376845), 3: np.float64(0.04193653193370962), 4: np.float64(0.07556305984368876), 5: np.float64(0.05053061320826028), 6: np.float64(0.11754253753462696), 7: np.float64(0.045706436198133686), 8: np.float64(0.041302730443527606), 9: np.float64(0.06699564447836756)}


In [10]:
import tensorflow as tf
import numpy as np

IMG_SIZE = 224
BATCH_SIZE = 32
DATA_DIR = "/kaggle/input/paddy-doctor/paddy-disease-classification/train_images"
EPOCHS = 10


In [11]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(224,224),
    batch_size=32,
    label_mode="int",          # üî• CHANGE THIS
    validation_split=0.1,
    subset="training",
    seed=42
)

val_dataset = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(224,224),
    batch_size=32,
    label_mode="int",          # üî• CHANGE THIS
    validation_split=0.1,
    subset="validation",
    seed=42
)

class_names = train_dataset.class_names


Found 10407 files belonging to 10 classes.
Using 9367 files for training.
Found 10407 files belonging to 10 classes.
Using 1040 files for validation.


In [12]:
from tensorflow.keras.applications.efficientnet import preprocess_input

train_dataset = train_dataset.map(
    lambda x, y: (preprocess_input(x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

val_dataset = val_dataset.map(
    lambda x, y: (preprocess_input(x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)


In [13]:
import numpy as np

label_counts = {i: 0 for i in range(len(class_names))}

for _, labels in train_dataset.unbatch():
    label_counts[int(labels.numpy())] += 1

class_weights_dict = {
    i: max(label_counts.values()) / count
    for i, count in label_counts.items()
}

print(class_weights_dict)


{0: 3.6492027334851938, 1: 4.711764705882353, 2: 5.322259136212624, 3: 1.0203821656050955, 4: 1.8287671232876712, 5: 1.237065637065637, 6: 2.907441016333938, 7: 1.132155477031802, 8: 1.0, 9: 1.638036809815951}


In [19]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models

num_classes = len(class_names)

base_model = EfficientNetB0(
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    weights='imagenet'
)

# ----------------------------
# Step 1: Train head first
# ----------------------------
base_model.trainable = False  # freeze backbone

inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)

model = models.Model(inputs, outputs)

# Compile for head training
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train head first
history_head = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5
)

# ----------------------------
# Step 2: Fine-tune top layers
# ----------------------------
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history_fine = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5
)


Epoch 1/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m65s[0m 156ms/step - accuracy: 0.3545 - loss: 1.8415 - val_accuracy: 0.5856 - val_loss: 1.2576
Epoch 2/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m25s[0m 87ms/step - accuracy: 0.5678 - loss: 1.2821 - val_accuracy: 0.6442 - val_loss: 1.0986
Epoch 3/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m25s[0m 86ms/step - accuracy: 0.6140 - loss: 1.1392 - val_accuracy: 0.6731 - val_loss: 0.9855
Epoch 4/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m25s[0m 87ms/step - accuracy: 0.6541 - loss: 1.0398 - val_accuracy: 0.6904 - val_loss: 0.9328
Epoch 5/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m25s[0m 86ms/step - accuracy: 0.6701 - loss: 0.9913 - val_accuracy: 0.7240 - v

In [18]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5
)


Epoch 1/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m74s[0m 171ms/step - accuracy: 0.3960 - loss: 1.8135 - val_accuracy: 0.6952 - val_loss: 0.9498
Epoch 2/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m45s[0m 89ms/step - accuracy: 0.6944 - loss: 0.9584 - val_accuracy: 0.7885 - val_loss: 0.6417
Epoch 3/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m26s[0m 87ms/step - accuracy: 0.7927 - loss: 0.6719 - val_accuracy: 0.8471 - val_loss: 0.4678
Epoch 4/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m27s[0m 91ms/step - accuracy: 0.8492 - loss: 0.5024 - val_accuracy: 0.8837 - val_loss: 0.3709
Epoch 5/5
[1m293/293[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m25s[0m 87ms/step - accuracy: 0.8818 - loss: 0.3947 - val_accuracy: 0.9000 - v

In [20]:
import tensorflow as tf

# 1Ô∏è‚É£ Save the trained Keras model (optional)
model.save("pest_disease_detection_model.h5")  # HDF5 backup

# 2Ô∏è‚É£ Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Optional optimization
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = converter.convert()

# 3Ô∏è‚É£ Save as TFLite
with open("pest_disease_detection_model.tflite", "wb") as f:
    f.write(tflite_model)

print("TFLite model saved as pest_disease_detection_model.tflite")




Saved artifact at '/tmp/tmpsgc8cpar'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_967')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  139736524710608: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  139736524702160: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  139734011798416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139733471904976: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139733471904016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139734011804368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139733471905936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139733471904208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139733471904592: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139733471905744: TensorSpec(shape=(), dtype=tf.resource, name

In [21]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("tntiphan/paddy-rice-disease-classification")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/tntiphan/paddy-rice-disease-classification?dataset_version_number=7...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4.05G/4.05G [01:02<00:00, 69.3MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/tntiphan/paddy-rice-disease-classification/versions/7


In [22]:
import tensorflow as tf
import numpy as np

# 1Ô∏è‚É£ Load the TFLite model
tflite_model_path = "/content/pest_disease_detection_model.tflite"
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# 2Ô∏è‚É£ Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_shape = input_details[0]['shape']
print(f"Model input shape: {input_shape}")

# 3Ô∏è‚É£ Function to predict a single image
def tflite_predict(image):
    # Resize and normalize image
    img = tf.image.resize(image, (input_shape[1], input_shape[2]))
    img = tf.expand_dims(img, axis=0)  # batch dimension
    img = tf.cast(img, tf.float32) / 255.0  # normalization

    interpreter.set_tensor(input_details[0]['index'], img.numpy())
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    return np.argmax(output, axis=1)[0]

# Example usage:
# pred_class = tflite_predict(some_image_tensor)
# print("Predicted class index:", pred_class)


Model input shape: [  1 224 224   3]


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [23]:
import os

dataset_path = "/root/.cache/kagglehub/datasets/tntiphan/paddy-rice-disease-classification/versions/7"

for root, dirs, files in os.walk(dataset_path):
    level = root.replace(dataset_path, '').count(os.sep)
    indent = ' ' * 4 * level
    print(f"{indent}{os.path.basename(root)}/")

    subindent = ' ' * 4 * (level + 1)
    # Only show first 5 files
    for f in files[:5]:
        print(f"{subindent}{f}")


7/
    id2label.json
    label2id.json
    metadata.csv
    data/
        brown_spot/
            110147.jpg
            PD106204.jpg
            B100415.jpg
            brown_spot1375.jpg
            brown_spot (217).JPG
        bacterial_leaf_blight/
            101933.jpg
            blight-_0_5101.jpg
            blight-_0_9885.jpg
            blight-_0_5760.jpg
            BACTERAILBLIGHT3_081.JPG
        healthy/
            H_73.jpg
            104284.jpg
            102316.jpg
            104254.jpg
            104805.jpg
        leaf_blast/
            leaf_blast170.jpg
            leaf_blast706.jpg
            H100712.jpg
            IMG_20190419_133940.jpg
            leaf_blast28.jpg


In [1]:
import os
import pandas as pd
import tensorflow as tf
from tqdm import tqdm  # progress bar

# Dataset directory
data_dir = "/root/.cache/kagglehub/datasets/tntiphan/paddy-rice-disease-classification/versions/7/data"

# Map folder names to class indices
class_names = sorted(os.listdir(data_dir))
class_to_index = {name: idx for idx, name in enumerate(class_names)}

# List to store results
results = []

# Loop through all images in all classes with progress bar
total_images = sum(len(os.listdir(os.path.join(data_dir, cls))) for cls in class_names)

with tqdm(total=total_images, desc="Predicting images") as pbar:
    for class_name, class_idx in class_to_index.items():
        class_folder = os.path.join(data_dir, class_name)
        for img_file in os.listdir(class_folder):
            img_path = os.path.join(class_folder, img_file)
            # Load image
            img = tf.keras.preprocessing.image.load_img(img_path)
            img_tensor = tf.keras.preprocessing.image.img_to_array(img)
            # Predict using TFLite
            pred = tflite_predict(img_tensor)
            # Save results
            results.append({
                "image_file": img_file,
                "true_label": class_name,
                "true_label_idx": class_idx,
                "predicted_label_idx": pred,
                "predicted_label_name": class_names[pred]
            })
            pbar.update(1)  # update progress bar

# Convert results to DataFrame and save CSV
df_results = pd.DataFrame(results)
csv_path = "/content/paddy_predictions.csv"
df_results.to_csv(csv_path, index=False)
print(f"Predictions saved to {csv_path}")


KeyboardInterrupt: 