# Coffee Bean Classification with **GNN (Superpixel Graph)**
Notebook ini mengubah pendekatan **CNN** menjadi **Graph Neural Network (GNN)** menggunakan **superpixel** sebagai node dan **k-NN** sebagai edge. Library utama: `spektral` (untuk GNN berbasis Keras/TensorFlow) dan `scikit-image` (untuk superpixel SLIC).
    
> **Struktur dataset yang diharapkan**
>
> ```
> Dataset/
> ├── train/
> │   ├── ClassA/
> │   │   ├── img1.jpg
> │   │   └── ...
> │   ├── ClassB/
> │   └── ...
> ├── val/
> └── test/
> ```
>
> Setiap subfolder mewakili **label**.


In [None]:
# (Opsional) Install dependencies - jalankan jika environment belum punya paket ini
# Jika sudah terpasang, Anda bisa melewati cell ini.
# Catatan: Pada beberapa environment offline, perintah ini perlu dijalankan manual.
# !pip -q install spektral scikit-image scikit-learn tensorflow==2.*

In [1]:
# Import libraries
import os
import glob
import random
import numpy as np
import tensorflow as tf

from skimage import io, transform, color
from skimage.segmentation import slic
from sklearn.neighbors import NearestNeighbors

from spektral.data import Dataset, Graph
from spektral.data.loaders import DisjointLoader
from spektral.layers import GCNConv, GlobalSumPool
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("TensorFlow:", tf.__version__)

ModuleNotFoundError: No module named 'spektral'

## Konfigurasi & Path Dataset

In [None]:
# Ubah path sesuai struktur Anda
training_path = "Dataset/train"
validation_path = "Dataset/valid"
testing_path = "Dataset/test"

# Hyperparameters GNN & Preprocessing
IMG_SIZE = 256            # resize awal gambar sebelum SLIC
N_SEGMENTS = 120          # jumlah superpixel (node) per gambar
COMPACTNESS = 10.0        # parameter SLIC
KNN_K = 8                 # jumlah tetangga untuk edge
BATCH_SIZE = 8
EPOCHS = 20
LEARNING_RATE = 1e-3

# Fitur node: [L, a, b, x_norm, y_norm, std_L, std_a, std_b] -> 8 fitur
FEATURES_DIM = 8

## Utilitas: Listing file & mapping label

In [None]:
def list_files_labels(root_dir):
    """Mengembalikan list (filepath, label_id) dan peta label_id -> label_name."""
    classes = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
    label_to_id = {c: i for i, c in enumerate(classes)}
    id_to_label = {i: c for c, i in label_to_id.items()}
    
    files = []
    for c in classes:
        folder = os.path.join(root_dir, c)
        for ext in ("*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tif", "*.tiff", "*.webp"):
            for f in glob.glob(os.path.join(folder, ext)):
                files.append((f, label_to_id[c]))
    return files, id_to_label

def load_image(path, img_size=256):
    img = io.imread(path)
    if img.ndim == 2:  # grayscale -> RGB
        img = np.stack([img, img, img], axis=-1)
    if img.shape[-1] == 4:  # RGBA -> RGB
        img = img[..., :3]
    img = transform.resize(img, (img_size, img_size), anti_aliasing=True, preserve_range=True)
    img = img.astype(np.float32) / 255.0
    return img

def image_to_superpixel_graph(img, n_segments=120, compactness=10.0, knn_k=8):
    """Konversi gambar menjadi graph:
    - Node: superpixel dari SLIC
    - Fitur node: mean Lab (L,a,b), posisi (x,y) ter-normalisasi, dan std Lab
    - Edge: k-NN pada centroid superpixel
    Mengembalikan X (n_nodes x F), A (n_nodes x n_nodes)
    """
    # Segmentasi superpixel
    segments = slic(img, n_segments=n_segments, compactness=compactness, start_label=0, channel_axis=-1)
    n_nodes = segments.max() + 1
    
    # Konversi ke ruang warna Lab untuk fitur yang lebih stabil
    img_lab = color.rgb2lab(img)
    
    # Hitung fitur per superpixel
    X = np.zeros((n_nodes, 8), dtype=np.float32)
    centroids = np.zeros((n_nodes, 2), dtype=np.float32)  # (y, x)
    
    for seg_id in range(n_nodes):
        mask = segments == seg_id
        if not np.any(mask):
            continue
        # Mean & std Lab
        L = img_lab[..., 0][mask]
        a = img_lab[..., 1][mask]
        b = img_lab[..., 2][mask]
        Lm, am, bm = L.mean(), a.mean(), b.mean()
        Ls, as_, bs = L.std() if L.size > 1 else 0.0, a.std() if a.size > 1 else 0.0, b.std() if b.size > 1 else 0.0
        
        # Centroid posisi (y, x)
        ys, xs = np.where(mask)
        cy, cx = ys.mean(), xs.mean()
        h, w = img.shape[:2]
        cy_n, cx_n = cy / h, cx / w  # normalisasi 0..1
        
        X[seg_id] = [Lm, am, bm, cx_n, cy_n, Ls, as_, bs]
        centroids[seg_id] = [cy, cx]
    
    # Edge dengan k-NN (euclidean) pada centroid
    k = min(knn_k + 1, n_nodes)  # +1 untuk self, nanti dibuang
    nbrs = NearestNeighbors(n_neighbors=k, metric='euclidean').fit(centroids)
    indices = nbrs.kneighbors(return_distance=False)
    
    A = np.zeros((n_nodes, n_nodes), dtype=np.float32)
    for i in range(n_nodes):
        for j in indices[i]:
            if i == j:
                continue
            A[i, j] = 1.0
            A[j, i] = 1.0  # undirected
    
    # (Opsional) normalisasi fitur per-graf agar skala seragam
    # X = (X - X.mean(0, keepdims=True)) / (X.std(0, keepdims=True) + 1e-6)
    return X, A

# Cek cepat
# test_img = np.ones((256,256,3), dtype=np.float32)
# X, A = image_to_superpixel_graph(test_img, n_segments=50)

## Dataset Kustom untuk Spektral

In [None]:
class CoffeeSuperpixelDataset(Dataset):
    def __init__(self, root_dir, img_size=IMG_SIZE, n_segments=N_SEGMENTS, compactness=COMPACTNESS, knn_k=KNN_K, **kwargs):
        self.root_dir = root_dir
        self.img_size = img_size
        self.n_segments = n_segments
        self.compactness = compactness
        self.knn_k = knn_k
        
        self.files_labels, self.id_to_label = list_files_labels(root_dir)
        self.n_classes = len(self.id_to_label)
        super().__init__(**kwargs)

    def read(self):
        graphs = []
        for fp, y_id in self.files_labels:
            img = load_image(fp, self.img_size)
            X, A = image_to_superpixel_graph(img, self.n_segments, self.compactness, self.knn_k)
            y = tf.keras.utils.to_categorical(y_id, num_classes=self.n_classes).astype(np.float32)
            graphs.append(Graph(x=X, a=A, y=y))
        return graphs

## Bangun Dataset & DataLoader

In [None]:
# Build datasets
train_ds = CoffeeSuperpixelDataset(training_path)
val_ds   = CoffeeSuperpixelDataset(validation_path)
test_ds  = CoffeeSuperpixelDataset(testing_path)

N_CLASSES = train_ds.n_classes
print("Classes:", [train_ds.id_to_label[i] for i in range(N_CLASSES)])
print(f"Train graphs: {len(train_ds)}, Val: {len(val_ds)}, Test: {len(test_ds)}")

# Loaders (disjoint) untuk batch graph ukuran variabel
train_loader = DisjointLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DisjointLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DisjointLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

## Arsitektur GNN (GCN + Global Pooling)

In [None]:
# Model GNN sederhana dengan GCNConv
# Input mengikuti format 'disjoint' dari Spektral: [X, A, I] (fit() otomatis dari DisjointLoader)

class GNNModel(Model):
    def __init__(self, n_classes, hidden=128, dropout_rate=0.2):
        super().__init__()
        self.gcn1 = GCNConv(hidden, activation='relu')
        self.bn1 = BatchNormalization()
        self.drop1 = Dropout(dropout_rate)
        
        self.gcn2 = GCNConv(hidden, activation='relu')
        self.bn2 = BatchNormalization()
        self.drop2 = Dropout(dropout_rate)
        
        self.pool = GlobalSumPool()
        self.out_dense1 = Dense(128, activation='relu')
        self.drop3 = Dropout(dropout_rate)
        self.out = Dense(n_classes, activation='softmax')
    
    def call(self, inputs):
        x, a, i = inputs
        x = self.gcn1([x, a])
        x = self.bn1(x)
        x = self.drop1(x)
        
        x = self.gcn2([x, a])
        x = self.bn2(x)
        x = self.drop2(x)
        
        x = self.pool([x, i])
        x = self.out_dense1(x)
        x = self.drop3(x)
        return self.out(x)

model = GNNModel(N_CLASSES, hidden=128, dropout_rate=0.3)
optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=[(None, None, FEATURES_DIM), (None, None), (None,)])  # for summary
model.summary()

## Training

In [None]:
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=6, restore_best_weights=True, verbose=1)
]

history = model.fit(
    train_loader.load(),
    steps_per_epoch=train_loader.steps_per_epoch,
    validation_data=val_loader.load(),
    validation_steps=val_loader.steps_per_epoch,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)

## Evaluasi

In [None]:
print("Evaluasi pada test set:")
test_results = model.evaluate(
    test_loader.load(),
    steps=test_loader.steps_per_epoch,
    verbose=1
)
print(dict(zip(model.metrics_names, test_results)))

## Inferensi Contoh & Visualisasi Superpixel (Opsional)

In [None]:
# Menampilkan prediksi dan visualisasi superpixel untuk beberapa contoh dari test set
import matplotlib.pyplot as plt

def visualize_superpixels(path):
    img = load_image(path, IMG_SIZE)
    segments = slic(img, n_segments=N_SEGMENTS, compactness=COMPACTNESS, start_label=0, channel_axis=-1)
    plt.figure(figsize=(4,4))
    plt.imshow(img)
    plt.contour(segments, colors='white', linewidths=0.5)
    plt.axis('off')
    plt.title('Superpixel SLIC')
    plt.show()

# Ambil 3 file acak dari test set
test_files = [fp for fp, _ in test_ds.files_labels]
random.shuffle(test_files)
for fp in test_files[:3]:
    img = load_image(fp, IMG_SIZE)
    X, A = image_to_superpixel_graph(img, N_SEGMENTS, COMPACTNESS, KNN_K)
    # Spektral meminta 'i' (graph indicator) untuk batch 1 graf -> zeros
    I = np.zeros((X.shape[0],), dtype=np.int64)
    pred = model.predict([X[np.newaxis, ...], A[np.newaxis, ...], I[np.newaxis, ...]], verbose=0)[0]
    pred_id = pred.argmax()
    label_name = test_ds.id_to_label[pred_id]
    print(f"Prediksi: {label_name} (p={pred[pred_id]:.3f}) — file: {os.path.basename(fp)}")
    visualize_superpixels(fp)

## Catatan & Tips
- **N_SEGMENTS** (jumlah superpixel) trade-off antara detail vs waktu komputasi. Coba 80–200.
- **KNN_K** mengontrol kepadatan edge. Nilai 6–10 umumnya stabil.
- Tambahkan **augmentasi** di level gambar sebelum SLIC untuk memperkaya variasi (flip/rotate).
- Coba layer lain: `GraphSageConv`, `GATConv` (Spektral) untuk eksplorasi arsitektur.
- Anda bisa menyimpan **label map** via `train_ds.id_to_label` untuk digunakan saat deployment.
