<a href="https://colab.research.google.com/github/chxbim/FP_ML_TomatoLeafDisease/blob/main/Reprocessing_FP_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/MyDrive')

Mounted at /content/MyDrive


In [3]:
import os
import cv2
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
    accuracy_score, precision_score,
    recall_score, f1_score, confusion_matrix
)
from xgboost import XGBClassifier

In [4]:
TRAIN_DIR = "/content/MyDrive/MyDrive/dataset FP ML/train"
VAL_DIR   = "/content/MyDrive/MyDrive/dataset FP ML/val"

In [5]:
def extract_features_from_dir(base_dir, img_size=(128,128)):
    x = []
    y = []

    for class_name in os.listdir(base_dir):
        class_path = os.path.join(base_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)

            img = cv2.imread(img_path)
            if img is None:
                continue

            img = cv2.resize(img, img_size)

            #fitur: mean & std RGB
            mean = img.mean(axis=(0,1))
            std  = img.std(axis=(0,1))

            feature = np.concatenate([mean, std])
            x.append(feature)
            y.append(class_name)

    return np.array(x), np.array(y)

In [6]:
x_train, y_train = extract_features_from_dir(TRAIN_DIR)
x_val, y_val     = extract_features_from_dir(VAL_DIR)

print(x_train.shape, x_val.shape)

(10000, 6) (1000, 6)


In [7]:
encoder = LabelEncoder()

y_train_enc = encoder.fit_transform(y_train)
y_val_enc   = encoder.transform(y_val)

In [8]:
os.makedirs("artifacts", exist_ok=True)
os.makedirs("models", exist_ok=True)

np.save("artifacts/y_train.npy", y_train_enc)
np.save("artifacts/y_val.npy", y_val_enc)

with open("artifacts/class_names.json", "w") as f:
    json.dump(list(encoder.classes_), f)

In [9]:
dt_model = DecisionTreeClassifier(
    max_depth=10,
    random_state=42
)

dt_model.fit(x_train, y_train_enc)

y_pred_dt = dt_model.predict(x_val)

In [12]:
metrics_dt = {
    "accuracy": accuracy_score(y_val_enc, y_pred_dt),
    "precision": precision_score(y_val_enc, y_pred_dt, average="weighted"),
    "recall": recall_score(y_val_enc, y_pred_dt, average="weighted"),
    "f1_score": f1_score(y_val_enc, y_pred_dt, average="weighted")
}

cm_dt = confusion_matrix(y_val_enc, y_pred_dt)

with open("artifacts/metrics_dt.json", "w") as f:
    json.dump(metrics_dt, f, indent=4)

np.save("artifacts/confusion_dt.npy", cm_dt)

pickle.dump(dt_model, open("models/decision_tree.pkl", "wb"))

In [13]:
xgb_model = XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
    eval_metric="mlogloss"
)

xgb_model.fit(x_train, y_train_enc)

y_pred_xgb = xgb_model.predict(x_val)

In [15]:
metrics_xgb = {
    "accuracy": accuracy_score(y_val_enc, y_pred_xgb),
    "precision": precision_score(y_val_enc, y_pred_xgb, average="weighted"),
    "recall": recall_score(y_val_enc, y_pred_xgb, average="weighted"),
    "f1_score": f1_score(y_val_enc, y_pred_xgb, average="weighted")
}

cm_xgb = confusion_matrix(y_val_enc, y_pred_xgb)

with open("artifacts/metrics_xgb.json", "w") as f:
    json.dump(metrics_xgb, f, indent=4)

np.save("artifacts/confusion_xgb.npy", cm_xgb)

pickle.dump(xgb_model, open("models/xgboost.pkl", "wb"))