In [None]:
from PIL import Image
import numpy as np 
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import os

from skimage.feature import local_binary_pattern,  graycomatrix, graycoprops, hog
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [None]:
path = "C:\\Users\\Muhammad Ammar M\\Documents\\Kuylah S2\\Pengolahan dan Analisis Citra Digital\\PACD_Assignment\\small_dataset\\ChestXRay\\ChestXRay"

In [None]:
data = pd.read_csv("C:\\Users\\Muhammad Ammar M\\Documents\\Kuylah S2\\Pengolahan dan Analisis Citra Digital\\PACD_Assignment\\small_dataset\\MetaData.csv", index_col=False)
data = data.drop(columns=["gender", "age", "county", "remarks"])
data.head()

In [None]:
class utils():
    def __init__(self):
        pass
    
    @staticmethod
    def display_image(title, image):
        cv2.imshow(title, image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    @staticmethod
    def display_histogram(image):
        plt.hist(np.array(image).flatten(), bins=256, color='gray')
        plt.title('Histogram')
        plt.xlabel('Intensitas')
        plt.ylabel('Frekuensi')
        plt.show()

    @staticmethod
    def convert_data_to_gray(path):
        folder_path = os.path.join(path, 'image')
        
        if os.path.isdir(folder_path):
            for file in os.listdir(folder_path):
                img_path = os.path.join(folder_path, file)
                
                if os.path.isfile(img_path):
                    img = cv2.imread(img_path)
                    if img is None:
                        print(f"{file} : None")
                    else:
                        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                        cv2.imwrite(img_path, gray)
                        # print(f"{file} : Non None")

    @staticmethod
    def dataset_check(path):
        folder_path = os.path.join(path, 'image')

        if os.path.isdir(folder_path):
            for file in os.listdir(folder_path):
                img_path = os.path.join(folder_path, file)

                img = Image.open(img_path)
                img = np.array(img)
                
                if img is None:
                    print(f"{file}: Cannot read file")
                    continue

                # Cek jumlah channel
                if len(img.shape) == 2:
                    # print(f"{img_path} pass")
                    continue
                elif len(img.shape) == 3 and img.shape[2] == 3:
                    print(img.shape)
                    print(f"{img_path} has 3 dimenstion")
                    return 0
                else:
                    return 0    
        else:
            print("Folder Kosong")
        
        return 1

class preprocess():
    def __init__(self):
        self.preprocessed_folder = 'preprocessed_img'
        self.image_folder = 'image'
        self.mask_folder = 'mask'
    
    def apply_mask(self, image, mask):
        mask_bin = (mask > 0).astype(np.uint8)
        masked_image = image * mask_bin
        return masked_image
    
    def crop(self, image, mask):
        ys, xs = np.where(mask > 0)

        if len(xs) == 0 or len(ys) == 0:
            raise ValueError("Mask tidak valid")
        
        xmin, xmax = np.min(xs), np.max(xs)
        ymin, ymax = np.min(ys), np.max(ys)

        cropped_image = image[ymin:ymax, xmin:xmax]
        return cropped_image
    
    def resize(self, image, target=(512, 512)):
        h, w = image.shape[:2]
        target_w, target_h = target

        scale = min(target_w / w, target_h / h)
        new_w = int(w * scale)
        new_h = int(h * scale)

        resized = cv2.resize(image, (new_w, new_h))

        pad_x = target_w - new_w
        pad_y = target_h - new_h

        padded = cv2.copyMakeBorder(
            resized,
            pad_y // 2, pad_y - pad_y // 2,
            pad_x // 2, pad_x - pad_x // 2,
            cv2.BORDER_CONSTANT,
            value=0
        )
        return padded

    def clahe(self, image):
        clahe = cv2.createCLAHE(clipLimit=5)
        clahe_img = np.clip(clahe.apply(image) + 30, 0, 255).astype(np.uint8)
        # _, threshold_img = cv2.threshold(image, 155, 255, cv2.THRESH_BINARY)
        return clahe_img
    
    def median_blur(self, image):
        blured_image = cv2.medianBlur(image, 5)
        return blured_image
    
    def preprocessing(self, folder_path):
        for file in os.listdir(folder_path):
            print(file)

        processed_image_folder = os.path.join(folder_path, self.preprocessed_folder)

        if not os.path.exists(processed_image_folder):
            os.makedirs(processed_image_folder)
            print(f"Created {processed_image_folder}")

        image_folder = os.path.join(folder_path, self.image_folder)
        mask_folder = os.path.join(folder_path, self.mask_folder)

        if not os.path.isdir(image_folder):
            print("Folder image tidak ditemukan!")
            return

        for file in os.listdir(image_folder):
            img_path = os.path.join(image_folder, file)
            mask_path = os.path.join(mask_folder, file)

            if not os.path.isfile(img_path):
                continue

            img = Image.open(img_path)
            mask = Image.open(mask_path)
            img = np.array(img)
            mask = np.array(mask)

            masked_image = self.apply_mask(img, mask)
            cropped_image = self.crop(masked_image, mask)
            resized_image = self.resize(cropped_image, (1024, 1024))
            clahe_result = self.clahe(resized_image)
            median_blur_result = self.median_blur(clahe_result)

            pre_normalized = median_blur_result.astype(np.float32)
            normalized = pre_normalized / 255.0

            save_img = (normalized * 255).astype(np.uint8)
            output_path = os.path.join(processed_image_folder, file)
            cv2.imwrite(output_path, save_img)
            # np.save(output_path.replace('.png', '.npy'), normalized)
            # print(f"Saved: {output_path}")

class featureExtract():
    def __init__(self):
        self.preprocessed_folder = 'preprocessed_img'
        # lbp
        self.radius = 2
        self.n_points = 8 * self.radius

        #hog
        self.hog_dim = 5000

    def lbp(self, image):
        radius = 2
        n_points = 8 * radius
        lbp = local_binary_pattern(image, n_points, radius, method='uniform')
        hist, _ = np.histogram(
            lbp.ravel(),
            bins=np.arange(0, self.n_points + 3),
            range=(0, self.n_points + 2)
        )
        hist = hist.astype(float)
        hist /= hist.sum()
        return hist
    
    def glcm(self, image):
        glcm = graycomatrix(image, [1], [0], symmetric=True, normed=True)
        props = ["contrast", "dissimilarity", "homogeneity", 
                "energy", "correlation", "ASM"]
        features = [graycoprops(glcm, p)[0,0] for p in props]
        return np.array(features)

    def hog(self, image):
        hog_features = hog(image,
                        orientations=9,
                        pixels_per_cell=(16,16),
                        cells_per_block=(2,2),
                        block_norm="L2-Hys",
                        feature_vector=True)
        return hog_features
    
    def pca(self, hog):
        pca = PCA(n_components=15)
        hog_pca = pca.fit_transform(hog)
        return hog_pca
    
    def feature_extract(self, df, folder_path):
        hog_data = []
        image_folder = os.path.join(folder_path, self.preprocessed_folder)
        print("Membaca folder:", image_folder)

        # Tambahkan kolom lbp (jika belum ada)
        lbp_len = self.n_points + 2
        for i in range(lbp_len):
            col = f"lbp_{i}"
            if col not in df.columns:
                df[col] = 0.0

        # Kolom GLCM
        glcm_cols = ["glcm_contrast", "glcm_dissimilarity", "glcm_homogeneity",
                    "glcm_energy", "glcm_correlation", "glcm_ASM"]
        for col in glcm_cols:
            if col not in df.columns:
                df[col] = 0.0

        # Loop gambar
        for idx, row in df.iterrows():
            img_name = str(int(row["id"])) + ".png"
            img_path = os.path.join(image_folder, img_name)

            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                print("Gambar tidak ditemukan:", img_path)
                continue

            # --- LBP ---
            lbp_result = self.lbp(img)
            for i in range(lbp_len):
                df.at[idx, f"lbp_{i}"] = lbp_result[i]

            # --- GLCM ---
            glcm_feat = self.glcm(img)
            for i, col in enumerate(glcm_cols):
                df.at[idx, col] = glcm_feat[i]

            # --- HOG (batasi fitur ke 200 pertama) ---
            hog_feat = self.hog(img)
            hog_vec = hog_feat[:self.hog_dim]          # ambil 200 fitur pertama
            hog_data.append(hog_vec)

        hog_pca = self.pca(hog_data)
        df_pca = pd.DataFrame(hog_pca, columns=[f"pca{i}" for i in range(hog_pca.shape[0])])
        df = pd.concat([df, df_pca], axis=1)
            
        return df

class featureExtract2():
    def __init__(self, hog_dim=5000, pca_components=15):
        self.preprocessed_folder = "preprocessed_img"

        # LBP parameters
        self.radius = 2
        self.n_points = 8 * self.radius
        self.lbp_len = self.n_points + 2

        # HOG parameters
        self.hog_dim = hog_dim
        self.pca_components = pca_components

        # GLCM columns
        self.glcm_cols = [
            "glcm_contrast", "glcm_dissimilarity", "glcm_homogeneity",
            "glcm_energy", "glcm_correlation", "glcm_ASM"
        ]

    # ---------------- LBP -----------------
    def lbp(self, image):
        lbp = local_binary_pattern(image, self.n_points, self.radius, method='uniform')
        hist, _ = np.histogram(
            lbp.ravel(),
            bins=np.arange(0, self.lbp_len + 1),
            range=(0, self.lbp_len)
        )
        hist = hist.astype(float)
        hist /= hist.sum()
        return hist

    # ---------------- GLCM ----------------
    def glcm(self, image):
        glcm = graycomatrix(image, [1], [0], symmetric=True, normed=True)
        props = ["contrast", "dissimilarity", "homogeneity", 
                 "energy", "correlation", "ASM"]

        features = [graycoprops(glcm, p)[0,0] for p in props]
        return np.array(features)

    # ---------------- HOG -----------------
    def hog_extract(self, image):
        feat = hog(
            image,
            orientations=9,
            pixels_per_cell=(16, 16),
            cells_per_block=(2, 2),
            block_norm="L2-Hys",
            feature_vector=True,
        )
        return feat[:self.hog_dim]    # ambil 5000 pertama

    # ---------------- PCA -----------------
    def apply_pca(self, hog_features):
        pca = PCA(n_components=self.pca_components)
        return pca.fit_transform(hog_features)

    # ---------------- Main Feature Extract -----------------
    def feature_extract(self, df, folder_path):

        hog_list = []

        # persiapkan kolom LBP
        for i in range(self.lbp_len):
            df[f"lbp_{i}"] = 0.0

        # persiapkan kolom GLCM
        for col in self.glcm_cols:
            df[col] = 0.0

        image_folder = os.path.join(folder_path, self.preprocessed_folder)

        # Loop setiap baris data
        for idx, row in df.iterrows():
            img_path = os.path.join(image_folder, f"{int(row['id'])}.png")
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

            if img is None:
                print("Gambar tidak ditemukan:", img_path)
                continue

            # ---- LBP ----
            lbp_feat = self.lbp(img)
            for i in range(self.lbp_len):
                df.at[idx, f"lbp_{i}"] = lbp_feat[i]

            # ---- GLCM ----
            glcm_feat = self.glcm(img)
            for i, col in enumerate(self.glcm_cols):
                df.at[idx, col] = glcm_feat[i]

            # ---- HOG ----
            hog_feat = self.hog_extract(img)
            hog_list.append(hog_feat)

        # ---- PCA hanya untuk HOG ----
        hog_pca = self.apply_pca(hog_list)

        # Buat kolom PCA
        pca_df = pd.DataFrame(
            hog_pca,
            columns=[f"pca_{i}" for i in range(self.pca_components)]
        )

        # Gabung PCA ke dataframe
        df = pd.concat([df.reset_index(drop=True), pca_df], axis=1)

        return df


# ---------------- NORMALISASI ----------------
def normalize_features(df, label_col=None):
    scaler = StandardScaler()

    if label_col:
        labels = df[label_col]
        features = df.drop(columns=[label_col])
    else:
        features = df
        labels = None

    scaled = scaler.fit_transform(features)
    df_scaled = pd.DataFrame(scaled, columns=features.columns)

    if labels is not None:
        df_scaled[label_col] = labels

    return df_scaled, scaler



In [None]:
# utils.convert_data_to_gray(path)
# ok = utils.dataset_check(path)
# if ok:
#     print("Dataset ready to use")
# else:
#     print("Dataset unready")

In [None]:
# prep = preprocess()
# prep.preprocessing(path)

In [None]:
fe = featureExtract2()
result = fe.feature_extract(data,path)
result.to_csv("fitur.csv", index=False)

In [None]:
# Load image
# img = Image.open("C:\\Users\\Muhammad Ammar M\\Documents\\Kuylah S2\\Pengolahan dan Analisis Citra Digital\\PACD_Assignment\\small dataset\\ChestXRay\\ChestXRay\\preprocessed_img\\1000.png")
# img = np.array(img)

# Inspeksi Gambar
# utils.display_image("Test", img)
# utils.display_histogram(img)
# print(img.shape)