#  Tugas CBIR: Ekstraksi Fitur Warna, Tekstur, dan Bentuk

- **Nama:** 
- **NIM:** 
- **Kelas:** PCD VisCom A

Tujuan dari notebook ini adalah membangun sistem CBIR sederhana untuk mencari gambar berdasarkan tiga fitur:
1.  **Warna:** Histogram HSV & Statistik (Mean, Std, Skewness, Kurtosis)
2.  **Tekstur:** GLCM (rata-rata dari 4 arah untuk Contrast, Correlation, Energy, Homogeneity)
3.  **Bentuk:** Area, Perimeter, Aspect Ratio, Extent, Solidity, Circularity (dari threshold Otsu)

In [None]:
# Sel 1: Import Semua Library yang Dibutuhkan

import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
from scipy.stats import skew, kurtosis
from scipy.spatial.distance import euclidean, cosine
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")

print("Semua library berhasil di-import.")

## Langkah 1: Fungsi Ekstraksi Fitur

Kita akan membuat tiga fungsi utama sesuai permintaan tugas, ditambah satu fungsi *helper* untuk menggabungkan semuanya.

In [None]:
# Sel 2: Fungsi Ekstraksi Fitur

def extract_color_features(image_path):
    """
    Ekstraksi 2 fitur warna:
    1. Histogram HSV (16 bin H, 8 bin S, 8 bin V)
    2. Statistik (mean, std, skew, kurtosis) dari tiap channel HSV
    """
    image = cv2.imread(image_path)
    if image is None:
        return None
    
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    H, S, V = cv2.split(hsv)
    
    # 1. Histogram (Sesuai spek: 16-8-8 bin)
    hist_h = cv2.calcHist([hsv], [0], None, [16], [0, 180])
    hist_s = cv2.calcHist([hsv], [1], None, [8], [0, 256])
    hist_v = cv2.calcHist([hsv], [2], None, [8], [0, 256])
    
    # Normalisasi histogram
    cv2.normalize(hist_h, hist_h)
    cv2.normalize(hist_s, hist_s)
    cv2.normalize(hist_v, hist_v)
    
    hist_features = np.concatenate([hist_h.flatten(), hist_s.flatten(), hist_v.flatten()])
    
    # 2. Statistik
    stats_features = []
    for channel in [H, S, V]:
        mean = np.mean(channel)
        std = np.std(channel)
        sk = skew(channel.flatten())
        kurt = kurtosis(channel.flatten())
        stats_features.extend([mean, std, sk, kurt])
        
    # Gabungkan semua fitur warna
    return np.concatenate([hist_features, stats_features])


def extract_texture_features(image_path):
    """
    Ekstraksi fitur tekstur (GLCM):
    Rata-rata dari 4 arah (0, 45, 90, 135) untuk:
    Contrast, Correlation, Energy, Homogeneity
    """
    image = cv2.imread(image_path)
    if image is None:
        return None
        
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Tentukan parameter GLCM
    distances = [1] # Jarak piksel
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4] # 0, 45, 90, 135 derajat
    
    glcm = graycomatrix(gray, distances=distances, angles=angles, symmetric=True, normed=True)
    
    # Hitung properti
    contrast = graycoprops(glcm, 'contrast')
    correlation = graycoprops(glcm, 'correlation')
    energy = graycoprops(glcm, 'energy')
    homogeneity = graycoprops(glcm, 'homogeneity')
    
    # Ambil rata-rata dari 4 arah
    features = [
        np.mean(contrast),
        np.mean(correlation),
        np.mean(energy),
        np.mean(homogeneity)
    ]
    
    return np.array(features)


def extract_shape_features(image_path):
    """
    Ekstraksi fitur bentuk (pakai threshold Otsu):
    Area, Perimeter, Aspect Ratio, Extent, Solidity, Circularity
    """
    image = cv2.imread(image_path)
    if image is None:
        return None
        
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 1. Thresholding Otsu
    # [cite: 208, 209] (Materi kuliah menyebutkan fitur bentuk bergantung pada siluet/biner)
    ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # 2. Cari kontur terbesar (diasumsikan sebagai objek utama)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if not contours:
        # Jika tidak ada kontur, return fitur nol
        return np.zeros(6)
        
    cnt = max(contours, key=cv2.contourArea)
    
    # 3. Hitung fitur bentuk
    area = cv2.contourArea(cnt) # 
    perimeter = cv2.arcLength(cnt, True) # 
    
    # Bounding box untuk Aspect Ratio & Extent
    x, y, w, h = cv2.boundingRect(cnt)
    
    # Menghindari pembagian dengan nol
    aspect_ratio = w / float(h) if h > 0 else 0 #  (Formula: L/W)
    extent = area / float(w * h) if (w * h) > 0 else 0 #  (Formula: A/LW)
    
    # Convex Hull untuk Solidity
    hull = cv2.convexHull(cnt)
    hull_area = cv2.contourArea(hull)
    solidity = area / float(hull_area) if hull_area > 0 else 0 #  (Formula: A/CH)
    
    # Circularity (Roundness)
    #  (Formula: 4*pi*A / P^2)
    circularity = (4 * np.pi * area) / (perimeter**2) if perimeter > 0 else 0
    
    return np.array([area, perimeter, aspect_ratio, extent, solidity, circularity])


def get_all_features(image_path):
    """Fungsi helper untuk mengekstrak dan menggabungkan semua fitur"""
    
    color_feats = extract_color_features(image_path)
    texture_feats = extract_texture_features(image_path)
    shape_feats = extract_shape_features(image_path)
    
    if color_feats is None or texture_feats is None or shape_feats is None:
        return None
        
    # Gabungkan semua fitur
    combined_features = np.concatenate([color_feats, texture_feats, shape_feats])
    
    # Helper untuk memecah label kelas dari path
    # (e.g., "db/apple/apple_01.png" -> "apple")
    label = os.path.basename(os.path.dirname(image_path))
    
    return {
        "path": image_path,
        "label": label,
        "color": color_feats,
        "texture": texture_feats,
        "shape": shape_feats,
        "combined": combined_features
    }

print("Semua fungsi ekstraksi fitur telah didefinisikan.")

## Langkah 2: Indeksasi Database
Sekarang, kita akan memproses semua gambar di folder `db/` dan menyimpan fiturnya ke dalam file `features.csv`. Ini mempercepat proses pencarian nanti, jadi kita tidak perlu mengekstrak fitur setiap kali mencari.

In [None]:
# Sel 3: Proses Indeksasi Database (db/)

DB_PATH = "db/"
FEATURES_FILE = "features.csv"

# Cari semua gambar di dalam subfolder db/
# (Gunakan .png, .jpg, .jpeg)
image_paths = glob.glob(os.path.join(DB_PATH, "**", "*.png"), recursive=True)
image_paths.extend(glob.glob(os.path.join(DB_PATH, "**", "*.jpg"), recursive=True))
image_paths.extend(glob.glob(os.path.join(DB_PATH, "**", "*.jpeg"), recursive=True))

print(f"Ditemukan {len(image_paths)} gambar di folder {DB_PATH}")

all_features_list = []

# Loop dan ekstrak fitur
for path in image_paths:
    features = get_all_features(path)
    if features:
        all_features_list.append(features)
    else:
        print(f"Gagal memproses: {path}")

# Konversi ke DataFrame Pandas
features_df = pd.DataFrame(all_features_list)

# --- NORMALISASI FITUR ---
# Ini SANGAT PENTING, terutama untuk gabungan (Config C)
# agar skala fitur (misal area: 10000) tidak mendominasi
# fitur lain (misal homogeneity: 0.8)

scaler = MinMaxScaler()

# Pisahkan fitur-fitur untuk normalisasi
color_data = np.vstack(features_df['color'].values)
texture_data = np.vstack(features_df['texture'].values)
shape_data = np.vstack(features_df['shape'].values)

# Scale setiap grup fitur
scaled_color = scaler.fit_transform(color_data)
scaled_texture = scaler.fit_transform(texture_data)
scaled_shape = scaler.fit_transform(shape_data)

# Gabungkan kembali fitur yang sudah di-scale
scaled_combined = np.hstack([scaled_color, scaled_texture, scaled_shape])

# Simpan fitur yang SUDAH di-scale ke DataFrame
features_df['scaled_color'] = list(scaled_color)
features_df['scaled_texture'] = list(scaled_texture)
features_df['scaled_shape'] = list(scaled_shape)
features_df['scaled_combined'] = list(scaled_combined)

# Hapus kolom fitur mentah (opsional, hemat memori)
features_df = features_df.drop(columns=['color', 'texture', 'shape', 'combined'])

# Simpan ke CSV untuk digunakan nanti
# Kita pakai pickle agar tipe data (list numpy) terjaga
features_df.to_pickle(FEATURES_FILE)

print(f"Indeksasi selesai. Fitur (sudah dinormalisasi) disimpan ke {FEATURES_FILE}")
features_df.head()

## Langkah 3: Fungsi Pencarian (Search)
Fungsi ini akan mengambil 1 gambar *query*, mengekstrak fiturnya, membandingkannya dengan semua fitur di database, dan menampilkan Top-5 gambar yang paling mirip.

In [None]:
# Sel 4: Fungsi Pencarian dan Penampilan Hasil

# Muat fitur yang sudah disimpan
try:
    features_df = pd.read_pickle(FEATURES_FILE)
    print("Database fitur berhasil dimuat.")
except FileNotFoundError:
    print("File 'features.csv' tidak ditemukan. Jalankan Sel 3 terlebih dahulu.")

# Buat scaler baru untuk query, tapi kita akan pakai scaler dari data training (DB)
# (Ini penting! Kita harus menggunakan skala yang sama)
# Mari kita muat ulang data mentah HANYA untuk melatih scaler
# (Cara yang lebih baik adalah menyimpan scaler-nya, tapi ini untuk simplisitas)

# --- RE-LOAD SCALERS (Simplified way) ---
# Sebenarnya, kita harus save scaler-nya. 
# Tapi untuk tugas ini, kita asumsikan scaler-nya sama.
# Kita akan buat fungsi scaler "on-the-fly"
def get_scalers():
    df_raw = pd.DataFrame(all_features_list) # Ambil dari variabel global Sel 3
    
    color_scaler = MinMaxScaler().fit(np.vstack(df_raw['color'].values))
    texture_scaler = MinMaxScaler().fit(np.vstack(df_raw['texture'].values))
    shape_scaler = MinMaxScaler().fit(np.vstack(df_raw['shape'].values))
    combined_scaler = MinMaxScaler().fit(np.vstack(df_raw['combined'].values)) # Perlu scaler gabungan mentah
    
    return color_scaler, texture_scaler, shape_scaler, combined_scaler

# Dapatkan scaler
color_scaler, texture_scaler, shape_scaler, combined_scaler = get_scalers()


def search_and_display(query_path, db_df, config='C', k=5):
    """
    Fungsi utama untuk CBIR.
    config: 'A' (Warna), 'B' (GLCM), 'C' (Gabungan)
    """
    
    # 1. Ekstrak fitur mentah dari query
    query_features_raw = get_all_features(query_path)
    if not query_features_raw:
        print(f"Gagal memproses query: {query_path}")
        return [], ""
        
    query_label = query_features_raw['label']
    
    # 2. Pilih vektor fitur query & database berdasarkan config
    if config == 'A':
        # Scale fitur query
        query_vec = color_scaler.transform([query_features_raw['color']])[0]
        db_vecs = np.vstack(db_df['scaled_color'].values)
        feature_name = "Warna (Config A)"
    elif config == 'B':
        query_vec = texture_scaler.transform([query_features_raw['texture']])[0]
        db_vecs = np.vstack(db_df['scaled_texture'].values)
        feature_name = "Tekstur (Config B)"
    else: # config == 'C'
        # Gabungkan fitur mentah query SEBELUM di-scale
        raw_combined = np.concatenate([
            query_features_raw['color'],
            query_features_raw['texture'],
            query_features_raw['shape']
        ])
        query_vec = combined_scaler.transform([raw_combined])[0]
        db_vecs = np.vstack(db_df['scaled_combined'].values)
        feature_name = "Gabungan (Config C)"
        
    # 3. Hitung jarak (Euclidean)
    distances = []
    for i, db_vec in enumerate(db_vecs):
        dist = euclidean(query_vec, db_vec) # [cite: 566] (Materi kuliah mencontohkan Euclidean)
        distances.append((db_df.iloc[i]['path'], db_df.iloc[i]['label'], dist))
        
    # 4. Urutkan berdasarkan jarak (terkecil = paling mirip)
    distances.sort(key=lambda x: x[2])
    
    # 5. Ambil Top-k hasil
    top_k_results = distances[:k]
    
    # 6. Tampilkan hasil (PENTING UNTUK SCREENSHOT)
    fig, axes = plt.subplots(1, k + 1, figsize=(20, 5))
    
    # Tampilkan Query
    query_img = cv2.imread(query_path)
    query_img = cv2.cvtColor(query_img, cv2.COLOR_BGR2RGB)
    axes[0].imshow(query_img)
    axes[0].set_title(f"Query: {os.path.basename(query_path)}\nConfig: {feature_name}")
    axes[0].axis('off')
    
    # Tampilkan Top-k
    result_labels = []
    for i, (path, label, dist) in enumerate(top_k_results):
        result_img = cv2.imread(path)
        result_img = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)
        
        # Beri warna hijau jika sekelas, merah jika tidak
        color = 'green' if label == query_label else 'red'
        
        axes[i+1].imshow(result_img)
        axes[i+1].set_title(f"Rank {i+1}: {os.path.basename(path)}\nLabel: {label}\nDist: {dist:.4f}", color=color)
        axes[i+1].axis('off')
        
        result_labels.append(label)
        
    plt.tight_layout()
    plt.show()
    
    return result_labels, query_label

print("Fungsi pencarian siap.")

## Langkah 4: Evaluasi (Precision@5)
Kita akan membuat fungsi untuk menghitung Precision@5, lalu menjalankan semua gambar *query* untuk ketiga konfigurasi (A, B, C) dan membuat tabel hasilnya.

In [None]:
# Sel 5: Fungsi Evaluasi dan Eksekusi Utama

def calculate_precision_at_k(query_label, result_labels, k=5):
    """Menghitung Precision@k"""
    if not result_labels:
        return 0
        
    relevant_count = sum([1 for label in result_labels[:k] if label == query_label])
    precision = relevant_count / k
    return precision

print("Fungsi P@5 siap.")

# --- EKSEKUSI UTAMA ---

QUERY_PATH = "query/"
# Cari semua gambar di dalam subfolder query/ (rekursif)
query_image_paths = glob.glob(os.path.join(QUERY_PATH, "**", "*.png"), recursive=True)
query_image_paths.extend(glob.glob(os.path.join(QUERY_PATH, "**", "*.jpg"), recursive=True))
query_image_paths.extend(glob.glob(os.path.join(QUERY_PATH, "**", "*.jpeg"), recursive=True))

print(f"Ditemukan {len(query_image_paths)} gambar query.")

# List untuk menyimpan hasil evaluasi
evaluation_results = []

# Loop untuk setiap gambar query
for q_path in query_image_paths:
    print(f"--- Memproses Query: {os.path.basename(q_path)} ---")
    
    # Loop untuk setiap konfigurasi
    for config in ['A', 'B', 'C']:
        
        # Cari dan tampilkan hasil (ini akan mem-print plot)
        result_labels, query_label = search_and_display(q_path, features_df, config=config, k=5)
        
        # Hitung P@5
        p_at_5 = calculate_precision_at_k(query_label, result_labels, k=5)
        
        # Simpan hasil
        evaluation_results.append({
            "query": os.path.basename(q_path),
            "config": config,
            "query_label": query_label,
            "p@5": p_at_5
        })
        
        print(f"Config {config} | Query: {query_label} | P@5: {p_at_5}")

# Buat DataFrame dari hasil evaluasi
eval_df = pd.DataFrame(evaluation_results)

print("\n--- Hasil Evaluasi P@5 per Query ---")
print(eval_df.to_string())

# Hitung Mean Average Precision (MAP@5) untuk tiap konfigurasi
map_df = eval_df.groupby('config')['p@5'].mean().reset_index()
map_df = map_df.rename(columns={'p@5': 'MAP@5'})

print("\n--- Hasil Akhir (Tabel Precision@5 untuk Laporan) ---")
# Ubah nama config agar sesuai tabel di tugas
map_df['Konfigurasi'] = map_df['config'].map({
    'A': 'A. Warna',
    'B': 'B. GLCM',
    'C': 'C. Gabungan (Warna+GLCM+Bentuk)'
})
print(map_df[['Konfigurasi', 'MAP@5']].to_string(index=False))