### Feature Extraction

- Input dari `data/processed/segmented`
- Output fitur disimpan di `data/processed/features`

Proses ekstraksi fitur:

Segmented Image + Mask   
→ ROI di-grayscale & dinormalisasi (0–255)  
→ Hitung GLCM (d = 1,3,5; θ = 0°, 45°, 90°, 135°)  
→ Ambil nilai **Contrast, Energy, Homogeneity, Correlation** (dirata-rata)  
→ Hitung **mean** & **std** intensitas piksel  
→ Gabungkan jadi **feature vector** per gambar  
→ Simpan ke file di `data/processed/features`

In [12]:
from pathlib import Path
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops

# path dasar 
ROOT = Path.cwd().parent

SEG_DIR = ROOT / "data" / "processed" / "segmented" 
OUT_DIR = ROOT / "data" / "processed" / "features" 
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("SEG_DIR:", SEG_DIR)
print("OUT_DIR:", OUT_DIR)

categories = [
    "bacterial_leaf_blight",
    "brown_spot",
    "healthy",
    "leaf_blast",
    "leaf_scald",
    "narrow_brown_spot",
]

SEG_DIR: d:\Bismillah Kuliah\Semester 3\TPSC\New folder\rice-leaf-disease-ml\data\processed\segmented
OUT_DIR: d:\Bismillah Kuliah\Semester 3\TPSC\New folder\rice-leaf-disease-ml\data\processed\features


In [6]:
def load_gray_and_mask_from_segmented(seg_path: Path):
    # segmented: daun berwarna, background hitam
    seg_bgr = cv2.imread(str(seg_path))
    if seg_bgr is None:
        raise ValueError(f"Gambar tidak bisa dibaca: {seg_path}")

    gray = cv2.cvtColor(seg_bgr, cv2.COLOR_BGR2GRAY)

    # mask: piksel > 0 dianggap daun
    mask = (gray > 0).astype("uint8")  # 1 = daun, 0 = background

    return gray, mask

In [16]:
import numpy as np
from skimage.feature import graycomatrix, graycoprops

def extract_features(gray: np.ndarray, mask: np.ndarray | None = None,
                     distances=[1, 3, 5],
                     angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
                     levels: int = 256):

    # fokus area daun
    if mask is not None:
        roi = gray.copy()
        roi[mask == 0] = 0
        values = gray[mask > 0]
        if values.size == 0:
            values = gray.flatten()
    else:
        roi = gray
        values = gray.flatten()

    # normalisasi ke 0–255 untuk GLCM
    roi_norm = cv2.normalize(roi, None, 0, 255, cv2.NORM_MINMAX).astype("uint8")

    # glcm
    glcm = graycomatrix(
        roi_norm,
        distances=distances,
        angles=angles,
        levels=levels,
        symmetric=True,
        normed=True,
    )

    feats = {}

    # properti built-in dari skimage (mean & std)
    glcm_props = ["contrast", "dissimilarity", "homogeneity",
                  "energy", "ASM", "correlation"]

    for prop in glcm_props:
        vals = graycoprops(glcm, prop)              # shape: (len(dist), len(angle))
        feats[f"{prop}_mean"] = float(vals.mean())
        feats[f"{prop}_std"]  = float(vals.std())

    # entropy + cluster features (pakai GLCM rata-rata)
    # rata-rata GLCM di semua jarak & sudut
    glcm_mean = glcm.mean(axis=(2, 3))              # shape: (levels, levels)
    p = glcm_mean.astype(np.float64)
    p /= p.sum() + 1e-12                            # normalisasi

    # entropy
    p_nonzero = p[p > 0]
    entropy = -np.sum(p_nonzero * np.log2(p_nonzero))
    feats["entropy"] = float(entropy)

    # index i,j
    i_idx, j_idx = np.indices((levels, levels))

    mu_x = np.sum(i_idx * p)
    mu_y = np.sum(j_idx * p)

    # cluster shade & prominence
    s = (i_idx + j_idx - mu_x - mu_y)
    feats["cluster_shade"]      = float(np.sum((s ** 3) * p))
    feats["cluster_prominence"] = float(np.sum((s ** 4) * p))

    # statistik intensitas piksel di area daun
    values = values.astype(np.float64)
    feats["intensity_mean"] = float(values.mean())
    feats["intensity_std"]  = float(values.std())
    feats["intensity_var"]  = float(values.var())

    return feats


In [17]:
rows = []

for cls in categories:
    cls_dir = SEG_DIR / cls
    print(f"Proses kelas: {cls}")

    for fname in os.listdir(cls_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        seg_path = cls_dir / fname

        gray, mask = load_gray_and_mask_from_segmented(seg_path)
        feats = extract_features(gray, mask)

        feats["label"] = cls                 # multi-class
        feats["filename"] = fname

        rows.append(feats)

features_df = pd.DataFrame(rows)
print(features_df.head())
print("Shape fitur:", features_df.shape)


Proses kelas: bacterial_leaf_blight
Proses kelas: brown_spot
Proses kelas: healthy
Proses kelas: leaf_blast
Proses kelas: leaf_scald
Proses kelas: narrow_brown_spot
   contrast_mean  contrast_std  dissimilarity_mean  dissimilarity_std  \
0     192.332773    160.534113            5.177595           2.878678   
1     160.716867    127.354181            1.887305           1.208933   
2     136.917713    106.136375            3.474702           1.786967   
3     149.157389    118.732150            3.022030           1.693803   
4     150.107731    101.642898            1.953991           1.071589   

   homogeneity_mean  homogeneity_std  energy_mean  energy_std  ASM_mean  \
0          0.549261         0.046668     0.457731    0.001552  0.209520   
1          0.881796         0.015744     0.855390    0.001064  0.731693   
2          0.573344         0.055062     0.439615    0.001641  0.193264   
3          0.761300         0.026582     0.708265    0.001072  0.501641   
4          0.829012  

In [19]:
csv_path = OUT_DIR / "train_features.csv"
pkl_path = OUT_DIR / "train_features.pkl"

features_df.to_csv(csv_path, index=False)
features_df.to_pickle(pkl_path)

print("Fitur disimpan ke:")
print("-", csv_path)
print("-", pkl_path)

Fitur disimpan ke:
- d:\Bismillah Kuliah\Semester 3\TPSC\New folder\rice-leaf-disease-ml\data\processed\features\train_features.csv
- d:\Bismillah Kuliah\Semester 3\TPSC\New folder\rice-leaf-disease-ml\data\processed\features\train_features.pkl
