In [10]:
import tensorflow as tf
import numpy as np
import cv2
import pathlib
import os
import pandas as pd

In [16]:
data_dir = pathlib.Path(r"C:\Users\PC\Documents\innar\data")  # Sesuaikan dengan path Anda
img_height = 224
img_width = 224
batch_size = 32

# Membaca dataset (label dari nama folder)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    labels="inferred",
    label_mode="int",
    color_mode="grayscale",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True,
    seed=42
)

print(train_ds.class_names)


Found 232 files belonging to 11 classes.


['0.5', '1', '1.5', '2', '2.5', '3', '3.5', '4', '5', '6', '7']


In [19]:
# Membuat mapping label int ke float (misalnya folder '2.5' jadi label 2.5)
def is_float_folder(name):
    try:
        float(name)
        return True
    except ValueError:
        return False

label_names = sorted([f.name for f in data_dir.iterdir() if f.is_dir() and is_float_folder(f.name)])
label_mapping = {i: float(name) for i, name in enumerate(train_ds.class_names)}

# Enhancement CLAHE
def apply_clahe_np(img):
    img = (img * 255).astype(np.uint8)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(img)
    return enhanced.astype(np.float32) / 255.0

# Augmentasi data
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1)
])

#dataset
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    labels="inferred",
    label_mode="int",
    color_mode="grayscale",
    image_size=(img_height, img_width),
    batch_size=1,
    shuffle=False #urutan tetap untuk dicocokkan dengan nama file
)

#ambil nama file dari dataset
file_paths = list(train_ds.file_paths)

#list untuk menyimpan hasil tabular
data_list = []

#loop per item dataset
for i, (img_batch, label_batch) in enumerate(train_ds):
    img = img_batch[0].numpy().squeeze() / 255.0  # Normalisasi
    clahe_img = apply_clahe_np(img)
    clahe_img = np.expand_dims(clahe_img, axis=-1)
    aug_img = data_augmentation(tf.convert_to_tensor(clahe_img[None, ...]))[0].numpy().squeeze()
    
    # Label float dari integer index
    label_int = int(label_batch[0].numpy())
    label_ppb = label_mapping[label_int]
    
    # Nama file asli
    file_name = os.path.basename(file_paths[i])

    data_list.append({
        "filename": file_name,
        "label_ppb": label_ppb,
        # "image_array": img  # atau ganti dengan img / clahe_img jika mau simpan hasil lain
    })

#konversi ke dataframe
df = pd.DataFrame(data_list)

#tampilkan ringkasan
print(df.head())

Found 232 files belonging to 11 classes.


      filename  label_ppb
0  0,5.10m.png        0.5
1  0,5.11m.png        0.5
2  0,5.12m.png        0.5
3  0,5.13m.png        0.5
4  0,5.14m.png        0.5
