In [1]:
# type: ignore
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [2]:
#split dataset paths
train_path = 'tomato_dataset/train'
val_path = 'tomato_dataset/val'
img_size = (64, 64) 

In [3]:
from PIL import UnidentifiedImageError

def load_images_from_folder(folder_path):
    images = []
    labels = []
    for class_name in os.listdir(folder_path):
        class_folder = os.path.join(folder_path, class_name)
        if not os.path.isdir(class_folder):
            continue
        for image_name in os.listdir(class_folder):
            image_path = os.path.join(class_folder, image_name)
            try:
                img = load_img(image_path, target_size=img_size)
                img_array = img_to_array(img) / 255.0
                images.append(img_array.flatten())  # flatten for KNN
                labels.append(class_name)
            except UnidentifiedImageError:
                print(f"Skipped invalid image: {image_path}")
            except Exception as e:
                print(f"Error loading image {image_path}: {e}")
    return np.array(images), np.array(labels)

In [4]:
X_train, y_train = load_images_from_folder(train_path)
X_val, y_val = load_images_from_folder(val_path)

Skipped invalid image: tomato_dataset/train\Tomato_Septoria_leaf_spot\plantdiseasedetection.ipynb
Skipped invalid image: tomato_dataset/train\Tomato__Tomato_YellowLeaf__Curl_Virus\svn-r6Yb5c
Skipped invalid image: tomato_dataset/val\Tomato_Septoria_leaf_spot\plantdiseasedetection.ipynb
Skipped invalid image: tomato_dataset/val\Tomato__Tomato_YellowLeaf__Curl_Virus\svn-r6Yb5c


In [5]:
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)

In [6]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train_enc)

0,1,2
,n_neighbors,3
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [7]:
y_pred = knn.predict(X_val)
acc = accuracy_score(y_val_enc, y_pred)

print("Validation Accuracy:", acc)
print("\nClassification Report:\n", classification_report(y_val_enc, y_pred, target_names=le.classes_))

Validation Accuracy: 0.6025735974819478

Classification Report:
                                              precision    recall  f1-score   support

                      Tomato_Bacterial_spot       0.34      0.95      0.50      1418
                        Tomato_Early_blight       0.67      0.22      0.34       688
                         Tomato_Late_blight       0.61      0.63      0.62      1281
                           Tomato_Leaf_Mold       0.64      0.72      0.68       648
                  Tomato_Septoria_leaf_spot       0.66      0.64      0.65      1191
Tomato_Spider_mites_Two_spotted_spider_mite       0.86      0.38      0.52      1121
                        Tomato__Target_Spot       0.83      0.17      0.28       938
      Tomato__Tomato_YellowLeaf__Curl_Virus       0.91      0.81      0.85      2187
                Tomato__Tomato_mosaic_virus       1.00      0.08      0.16       249
                             Tomato_healthy       0.84      0.56      0.68      1081

In [8]:
#save model

import pickle
with open('models/knn_tomato.pkl', 'wb') as f:
    pickle.dump(knn, f)

with open('models/label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)