## Import Libraries

In [None]:
import cv2
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import measure
from scipy import stats
from skimage.feature import graycomatrix, graycoprops
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')


## Fungsi untuk menampilkan gambar dari folder

In [None]:
def show_images_from_folder(folder_path, title="Sample Images"):
    images = sorted([os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.png')])
    if len(images) == 0:
        print(f"Tidak ada gambar di folder {folder_path}")
        return

    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    axes = axes.flatten()

    for i in range(3):
        img = cv2.imread(images[i], cv2.IMREAD_GRAYSCALE)
        axes[i].imshow(img, cmap='gray')
        axes[i].set_title(os.path.basename(images[i]))
        axes[i].axis('off')

    plt.suptitle(title, fontweight='bold')
    plt.tight_layout()
    plt.show()

## Membuat folder untuk menyimpan gambar

In [None]:
filter_folder = "filtered_images"
clahe_folder = "filtered_clahe"
output_folder = "output_images"

# Cek folder gambar yang sudah ada
for folder in [filter_folder, clahe_folder, output_folder]:
    if not os.path.exists(folder):
        os.makedirs(folder)

## Preprocessing: Gaussian filter dan CLACHE

In [None]:
def preprocessing(img, kernel_size=(5,5)):
    if img is None:
        return None
    # Mengaplikasikan Gaussian
    filtered_img = cv2.GaussianBlur(img, kernel_size, 0.5)

    # Mengaplikasikan CLAHE untuk contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced_img = clahe.apply(filtered_img)

    return enhanced_img

## Segmentation

### Otsu thresholding (sensitive segmentation)

In [None]:
def otsu_threshold(image):
  if image is None:
    return None
  _, otsu_thresh_img = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
  return otsu_thresh_img

### Global thresholding (insensitive segmentation)

In [None]:
def global_threshold(image, threshold_value=127):
  if image is None:
    return None
  _, global_thresh_img = cv2.threshold(image, threshold_value, 255, cv2.THRESH_BINARY)
  return global_thresh_img

### Morphological processing (closing)

In [None]:
def closing(binary_mask, kernel_size=(5, 5)):
  if binary_mask is None:
    return None
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernel_size)
  closed = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
  return closed

## Feature Extraction

### Fitur bentuk (shape fearures)

In [None]:
def extract_shape_features(mask):
  if mask is None:
    return {f'shape_{i}': 0 for i in range(17)} # Return default features if mask is None

  # Memberikan label ke komponen yang terhubung
  labelled_mask = measure.label(mask > 0)
  regions = measure.regionprops(labelled_mask)

  if len(regions) == 0:
    return {f'shape_{i}': 0 for i in range(17)}

  # Mengurutkan region berdasarkan area (asumsikan 2 region dengan area terbesar adalah paru-paru kanan dan kiri)
  regions = sorted(regions, key=lambda x: x.area, reverse=True)[:2]

  features = {}

  if len(regions) >= 2:
    r_left, r_right = regions[0], regions[1]

    # Fitur 1: tinggi
    h_l = r_left.bbox[2] - r_left.bbox[0]
    h_r = r_right.bbox[2] - r_right.bbox[0]
    features['height_left'] = h_l
    features['height_right'] = h_r
    features['height_diff'] = abs(h_l - h_r)

    # Fitur 2: rasio lebar/tinggi (simplified)
    # Ensure h_l and h_r are not zero to prevent division by zero
    features['ratio_left_0'] = r_left.major_axis_length / (h_l + 1e-5) if h_l > 0 else 0
    features['ratio_right_0'] = r_right.major_axis_length / (h_r + 1e-5) if h_r > 0 else 0
    # Only need one ratio for now, or loop it properly. Simplified for existing code structure.
    # The original loop for i in range(5) with same values was likely a placeholder or error.
    for i in range(1, 5):
        features[f'ratio_left_{i}'] = features['ratio_left_0']
        features[f'ratio_right_{i}'] = features['ratio_right_0']

    # Fitur 3: perimeter (keliling)
    features['perimeter_left'] = r_left.perimeter
    features['perimeter_right'] = r_right.perimeter

    # Fitur 4: eccentricity ("keanehan")
    features['eccentricity_left'] = r_left.eccentricity
    features['eccentricity_right'] = r_right.eccentricity
    return features

  else: # jika hanya ada 1 region yang terdeteksi
    r = regions[0]
    h = r.bbox[2] - r.bbox[0]
    features['height_left'] = h
    features['height_right'] = 0
    features['height_diff'] = h

    features['ratio_left_0'] = r.major_axis_length / (h + 1e-5) if h > 0 else 0
    for i in range(1, 5):
        features[f'ratio_left_{i}'] = features['ratio_left_0']
        features[f'ratio_right_{i}'] = 0

    features['perimeter_left'] = r.perimeter
    features['perimeter_right'] = 0

    features['eccentricity_left'] = r.eccentricity
    features['eccentricity_right'] = 0

    return features

### First-order Statistical Features (FOFS)

In [None]:
def fofs(image, mask):
  # Add initial None checks for image and mask
  if image is None or mask is None:
    return {k: 0 for k in ['mean', 'variance', 'std_dev', 'skewness', 'kurtosis', 'entropy', 'smoothness', 'uniformity']}

  # Apply mask ke gambar
  masked_image = cv2.bitwise_and(image, image, mask=mask)

  # Check if masked_image has valid pixels or if the mask is completely empty
  if np.all(masked_image == 0) or not np.any(mask > 0):
    return {k: 0 for k in ['mean', 'variance', 'std_dev', 'skewness', 'kurtosis', 'entropy', 'smoothness', 'uniformity']}

  pixels = masked_image[mask > 0]

  if len(pixels) == 0:
    return {k: 0 for k in ['mean', 'variance', 'std_dev', 'skewness', 'kurtosis', 'entropy', 'smoothness', 'uniformity']}

  # Menghitung fitur statistik
  mean = np.mean(pixels)
  variance = np.var(pixels)
  std_dev = np.std(pixels)
  skewness = stats.skew(pixels)
  kurtosis = stats.kurtosis(pixels)

  # Fitur histogram
  hist, _ = np.histogram(pixels, bins=255, range=(0, 256), density=True)
  hist = hist[hist > 0]
  entropy = -np.sum(hist * np.log2(hist + 1e-10)) # Corrected line
  smoothness = 1 - (1 / (1 + variance))
  uniformity = np.sum(hist ** 2)

  return {
      'mean' : mean,
      'variance' : variance,
      'std_dev' : std_dev,
      'skewness' : skewness,
      'kurtosis': kurtosis,
      'entropy' : entropy,
      'smoothness' : smoothness,
      'uniformity' : uniformity
  }

### Gray-Level Co-occurence Matrix (GLCM)

In [None]:
def glcm(image, mask):
  # Add initial None checks for image and mask
  if image is None or mask is None:
    return {f'glcm_{prop}_{angle}' : 0 for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM'] for angle in ['0', '45', '90', '135']}

  # Apply mask to the image
  masked_image = cv2.bitwise_and(image, image, mask=mask)

  # Check if masked_image is effectively empty (all zeros) after masking
  if np.all(masked_image == 0):
      return {f'glcm_{prop}_{angle}' : 0 for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM'] for angle in ['0', '45', '90', '135']}

  # Normalize the image to 8-bit. Handle cases where normalization might be problematic.
  # Ensure there are non-zero pixels before attempting normalization based on min/max.
  min_val = np.min(masked_image[masked_image > 0]) if np.any(masked_image > 0) else 0
  max_val = np.max(masked_image) if np.any(masked_image > 0) else 0

  # If no meaningful pixel data (e.g., all pixels are the same or zero after mask),
  # return default features.
  if max_val == 0 or min_val == max_val:
      return {f'glcm_{prop}_{angle}' : 0 for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM'] for angle in ['0', '45', '90', '135']}

  # Perform normalization safely
  img_8bit = cv2.normalize(masked_image, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')

  # GLCM in 4 directions (0, 45, 90, 135)
  distances = [1]
  angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]

  # Check if img_8bit is suitable for graycomatrix (needs at least 2 unique gray levels for meaningful GLCM)
  if img_8bit.size == 0 or len(np.unique(img_8bit)) < 2:
      return {f'glcm_{prop}_{angle}' : 0 for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM'] for angle in ['0', '45', '90', '135']}

  glcm = graycomatrix(img_8bit, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

  # Extract properties
  features = {}
  properties = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']

  for prop in properties:
    values = graycoprops(glcm, prop)
    for i, angle in enumerate(['0', '45', '90', '135']):
      features[f'glcm_{prop}_{angle}'] = values[0, i]

  return features

## Pipeline

In [None]:
def process_single_image(image_path):
  if not isinstance(image_path, str):
    print("Invalid path:", image_path)
    return None

  if not os.path.exists(image_path):
    print("Path not found:", image_path)
    return None

  # load
  img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
  if img is None:
    print("OpenCV cannot read:", image_path)
    return None

  # 1. Preprocessing
  # Pass the loaded image 'img' to preprocessing to avoid redundant loading
  enhanced = preprocessing(img)
  if enhanced is None:
    print(f"Preprocessing failed for {image_path}")
    return None

  # 2. Segmentation (Sensitive)
  otsu_mask = otsu_threshold(enhanced)
  if otsu_mask is None:
    print(f"Otsu thresholding failed for {image_path}")
    return None
  closed_otsu = closing(otsu_mask)
  if closed_otsu is None:
    print(f"Closing (Otsu) failed for {image_path}")
    return None

  # 3. Segmentation (Insensitive)
  global_mask = global_threshold(enhanced)
  if global_mask is None:
    print(f"Global thresholding failed for {image_path}")
    return None
  closed_global = closing(global_mask)
  if closed_global is None:
    print(f"Closing (Global) failed for {image_path}")
    return None

  # 4. Feature extraction
  # These functions are now expected to return dictionaries (possibly with default/zero values) or handle their own None inputs
  shape_featues = extract_shape_features(closed_otsu)
  fofs_features = fofs(enhanced, closed_global)
  glcm_features = glcm(enhanced, closed_global)

  # Gabungkan semua fitur - now safer as all feature functions should return dicts
  all_features = {**shape_featues, **fofs_features, **glcm_features}

  return all_features

In [None]:
def extract_dataset(df, limit=None):
  features_list = []
  labels = []

  total = len(df) if limit is None else min(limit, len(df))

  for idx, row in df.iterrows():
    if limit and idx >= limit:
      break

    if idx % 50 == 0:
      print(f"Processing image {idx+1}/{total}...")

    features = process_single_image(row['path'])
    if features is not None:
      features_list.append(features)
      labels.append(row['label'])

  # Konversi ke dataframe
  features_df = pd.DataFrame(features_list)
  features_df['label'] = labels

  print(f"\nFeature extraction selesai!")
  print(f"Total fitur yang diekstrak: {len(features_df.columns)-1}")

  return features_df

### Klasifikasi hierarkis

In [None]:
class klasifikasiHierarkis:
  """
  Implementasi:
  Stage 1: Klasifikasi berdasarkan fitur-fitur bentuk (shape features)
  Stage 2: Jika stage 1 = normal, cek dengan semua fitur
  """

  def __init__(self, model_type='svm'):
    self.model_type = model_type
    self.stage1_model = None # Shape feature
    self.stage2_model = None # Semua fitur
    self.scaler1 = StandardScaler()
    self.scaler2 = StandardScaler()

  def fit(self, X, y):
    """
    Training model secara dierarkis
    X: Dataframe dengan semua fitur
    y: label
    """
    # Identifikasi kolom shape features
    shape_cols = [col for col in X.columns if 'height' in col or
                  'ratio' in col or 'perimeter' in col or 'eccentricity' in col]

    X_shape = X[shape_cols]
    X_all = X

    # Fitur skalar
    X_shape_scaled = self.scaler1.fit_transform(X_shape)
    X_all_scaled = self.scaler2.fit_transform(X_all)

    # Training model Stage 1
    if self.model_type == 'svm':
      self.stage1_model = SVC(kernel='rbf', probability=True, random_state=42)
      self.stage2_model = SVC(kernel='rbf', probability=True, random_state=42)
    else:
      self.stage1_model = GaussianNB()
      self.stage2_model = GaussianNB()

    self.stage1_model.fit(X_shape_scaled, y)
    self.stage2_model.fit(X_all_scaled, y)

    print(f"Stage 1 ({self.model_type}) trained on {len(shape_cols)} fitur-fitur bentuk (shape features)")
    print(f"Stage 2 ({self.model_type}) trained on {X_all.shape[1]} semua fitur")

    return self

  def predict(self, X):
    """
    Prediksi Hierarkis
    """
    shape_cols = [col for col in X.columns if 'height' in col or
                   'ratio' in col or 'perimeter' in col or 'eccentricity' in col]

    X_shape = X[shape_cols]
    X_all = X

    X_shape_scaled = self.scaler1.transform(X_shape)
    X_all_scaled = self.scaler2.transform(X_all)

    # Stage 1: Prediksi dengan fitur-fitur bentuk (shape features)
    stage1_pred = self.stage1_model.predict(X_shape_scaled)
    stage1_proba = self.stage1_model.predict_proba(X_shape_scaled)[:, 1]

    # Stage 2: Untuk case yang diprediksi normal, cek lagi dengan semua fitur
    final_pred = stage1_pred.copy()
    for i in range(len(stage1_pred)):
      if stage1_pred[i] == 0:  # Prediksi = normal
        # Gunakan hasil stage 2 untuk prediksi final
        stage2_pred = self.stage2_model.predict(X_all_scaled[i:i+1])
        final_pred[i] = stage2_pred[0]

    return final_pred

  def predict_proba(self, X):
    """Get probabilitas prediksi"""
    predictions = self.predict(X)
    proba = np.zeros((len(predictions), 2))
    proba[predictions == 0, 0] = 1
    proba[predictions == 1, 1] = 1
    return proba

## Load and Save Models

In [None]:
import pickle

def load_models(svm_path="svm_model.pkl", nb_path="nb_model.pkl"):
    """Load pre-trained hierarchical classifiers for inference."""
    with open(svm_path, "rb") as f:
        svm_model = pickle.load(f)
    with open(nb_path, "rb") as f:
        nb_model = pickle.load(f)
    print(f"[INFO] Models loaded ← {svm_path}, {nb_path}")
    return svm_model, nb_model


In [None]:
def ensure_feature_alignment(features_df, required_cols):
    """Ensure all required columns exist and follow the trained order."""
    missing_cols = [col for col in required_cols if col not in features_df.columns]
    if missing_cols:
        for col in missing_cols:
            features_df[col] = 0
    return features_df[required_cols]


def calculate_prediction_results(
    image_path,
    svm_model_path="svm_model.pkl",
    nb_model_path="nb_model.pkl",
    preloaded_models=None,
):
    """Hitung hasil prediksi TB untuk satu gambar dan tampilkan metrik penting."""
    if not image_path or not os.path.exists(image_path):
        print(f"⚠️ Path gambar tidak valid: {image_path}")
        return None

    features = process_single_image(image_path)
    if features is None:
        print("⚠️ Gagal mengekstrak fitur dari gambar.")
        return None

    features_df = pd.DataFrame([features])

    # Load trained models (use cached pair when tersedia)
    if preloaded_models is not None:
        svm_model, nb_model = preloaded_models
    else:
        try:
            svm_model, nb_model = load_models(svm_model_path, nb_model_path)
        except FileNotFoundError as err:
            print(err)
            return None

    # Pastikan fitur align untuk tiap model
    svm_all_cols = list(svm_model.scaler2.feature_names_in_)
    nb_all_cols = list(nb_model.scaler2.feature_names_in_)
    svm_inputs = ensure_feature_alignment(features_df.copy(), svm_all_cols)
    nb_inputs = ensure_feature_alignment(features_df.copy(), nb_all_cols)

    # SVM predictions
    svm_pred = svm_model.predict(svm_inputs)[0]
    svm_stage2_scaled = svm_model.scaler2.transform(svm_inputs)
    svm_tb_prob = svm_model.stage2_model.predict_proba(svm_stage2_scaled)[0, 1]

    # NB predictions
    nb_pred = nb_model.predict(nb_inputs)[0]
    nb_stage2_scaled = nb_model.scaler2.transform(nb_inputs)
    nb_tb_prob = nb_model.stage2_model.predict_proba(nb_stage2_scaled)[0, 1]

    avg_tb_percent = (svm_tb_prob + nb_tb_prob) / 2 * 100
    final_pred = "TB" if svm_pred == 1 or nb_pred == 1 else "Normal"
    label_map = {0: "Normal", 1: "TB"}

    results = {
        "image": image_path,
        "final_prediction": final_pred,
        "svm_prediction": label_map.get(svm_pred, svm_pred),
        "nb_prediction": label_map.get(nb_pred, nb_pred),
        "svm_tb_probability": svm_tb_prob * 100,
        "nb_tb_probability": nb_tb_prob * 100,
        "average_tb_percentage": avg_tb_percent,
    }

    print("\n=== TB Prediction Results ===")
    for key, value in results.items():
        if isinstance(value, float):
            print(f"{key}: {value:.2f}")
        else:
            print(f"{key}: {value}")

    return results


def calculate_folder_predictions(
    folder_path,
    svm_model_path="svm_model.pkl",
    nb_model_path="nb_model.pkl",
):
    """Jalankan prediksi untuk semua gambar dalam satu folder."""
    if not folder_path:
        print("⚠️ Path folder tidak boleh kosong.")
        return None

    folder = Path(folder_path).expanduser()
    if not folder.exists() or not folder.is_dir():
        print(f"⚠️ Folder tidak ditemukan: {folder}")
        return None

    supported_ext = {".png", ".jpg", ".jpeg"}
    image_files = sorted(
        [str(p) for p in folder.iterdir() if p.suffix.lower() in supported_ext]
    )

    if not image_files:
        print(f"⚠️ Tidak ada gambar (.png/.jpg) di folder {folder}")
        return None

    try:
        models = load_models(svm_model_path, nb_model_path)
    except FileNotFoundError as err:
        print(err)
        return None

    aggregated_results = []
    print(f"\nMenjalankan prediksi untuk {len(image_files)} gambar di {folder}...")

    for img_path in image_files:
        print(f"\nProcessing: {img_path}")
        result = calculate_prediction_results(
            img_path,
            preloaded_models=models,
        )
        if result is not None:
            aggregated_results.append(result)

    if aggregated_results:
        results_df = pd.DataFrame(aggregated_results)
        summary_cols = ["image", "final_prediction", "average_tb_percentage"]
        print("\n=== Ringkasan Prediksi Folder ===")
        print(results_df[summary_cols].to_string(index=False))
        return results_df

    print("⚠️ Tidak ada hasil prediksi yang valid.")
    return None


## Main Function

In [None]:
def main():
    print("=== KONFIGURASI INPUT USER ===")
    folder_path_raw = input(
        "Masukkan folder yang berisi gambar untuk batch inference (Enter untuk skip): "
    ).strip()
    single_image_path_raw = input(
        "Masukkan path gambar tunggal untuk dianalisis (Enter untuk skip): "
    ).strip()

    if folder_path_raw:
        calculate_folder_predictions(folder_path_raw)
    elif single_image_path_raw:
        image_path = Path(single_image_path_raw).expanduser()
        calculate_prediction_results(str(image_path))
    else:
        print(
            "Isi path folder atau gambar tunggal agar prediksi dapat dijalankan."
        )

if __name__ == "__main__":
    main()
