In [1]:
import tensorflow as tf
import numpy as np
import cv2
import pathlib
import os
import pandas as pd

In [10]:
#path dataset dan parameter
data_dir = pathlib.Path(r"C:\Users\PC\Documents\innar\data akhir valid merah")
# data_dir = pathlib.Path(r"D:\SKRIPSI\kode\data akhir valid merah")
img_height = 224
img_width = 224
batch_size = 32

# Membaca dataset (label dari nama folder)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True,
    seed=42
)

print(train_ds.class_names)


Found 507 files belonging to 10 classes.


['0.5', '1', '1.5', '2', '2.5', '3', '3.5', '4', '5', '6']


In [None]:
# Membuat mapping label int ke float (misalnya folder '2.5' jadi label 2.5)
def is_float_folder(name):
    try:
        float(name)
        return True
    except ValueError:
        return False

label_names = sorted([f.name for f in data_dir.iterdir() if f.is_dir() and is_float_folder(f.name)])
label_mapping = {i: float(name) for i, name in enumerate(train_ds.class_names)}

# Enhancement CLAHE
def apply_clahe_np(img):
    img = (img * 255).astype(np.uint8)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(img)
    return enhanced.astype(np.float32) / 255.0

# Augmentasi data
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1)
])

#dataset
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    labels="inferred",
    label_mode="int",
    color_mode="RGB",
    image_size=(img_height, img_width),
    batch_size=1,
    shuffle=False #urutan tetap untuk dicocokkan dengan nama file
)

#ambil nama file dari dataset
file_paths = list(train_ds.file_paths)

#list untuk menyimpan hasil tabular
data_list = []

#loop per item dataset
for i, (img_batch, label_batch) in enumerate(train_ds):
    img = img_batch[0].numpy().squeeze() / 255.0  # Normalisasi
    clahe_img = apply_clahe_np(img)
    clahe_img = np.expand_dims(clahe_img, axis=-1)
    aug_img = data_augmentation(tf.convert_to_tensor(clahe_img[None, ...]))[0].numpy().squeeze()
    
    # Label float dari integer index
    label_int = int(label_batch[0].numpy())
    label_ppb = label_mapping[label_int]
    
    # Nama file asli
    file_name = os.path.basename(file_paths[i])

    data_list.append({
        "filename": file_name,
        "label_ppb": label_ppb,
        # "image_array": img  # atau ganti dengan img / clahe_img jika mau simpan hasil lain
    })

#konversi ke dataframe
df = pd.DataFrame(data_list)

#tampilkan ringkasan
print(df.head())

Found 507 files belonging to 10 classes.
      filename  label_ppb
0   m0,5-1.png        0.5
1  m0,5-10.png        0.5
2  m0,5-11.png        0.5
3  m0,5-12.png        0.5
4  m0,5-13.png        0.5
      filename  label_ppb
0   m0,5-1.png        0.5
1  m0,5-10.png        0.5
2  m0,5-11.png        0.5
3  m0,5-12.png        0.5
4  m0,5-13.png        0.5


In [12]:
model = tf.keras.applications.ResNet50(
    input_shape=(224, 224, 3),  # HARUS RGB
    include_top=False, 
    weights=None
)

In [13]:
# Script pembagian data: 70% train, 20% validasi, 10% test
import shutil
from sklearn.model_selection import train_test_split

def split_data_folder(source_dir, dest_dir, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    source_dir = pathlib.Path(source_dir)
    dest_dir = pathlib.Path(dest_dir)
    dest_dir.mkdir(parents=True, exist_ok=True)

    for class_folder in source_dir.iterdir():
        if not class_folder.is_dir():
            continue
        files = list(class_folder.glob('*.png'))
        if len(files) == 0:
            continue

        train_files, temp_files = train_test_split(files, train_size=train_ratio, random_state=42)
        val_size = val_ratio / (val_ratio + test_ratio)
        val_files, test_files = train_test_split(temp_files, train_size=val_size, random_state=42)

        for split_name, split_files in zip(['train', 'val', 'test'], [train_files, val_files, test_files]):
            split_class_dir = dest_dir / split_name / class_folder.name
            split_class_dir.mkdir(parents=True, exist_ok=True)
            for f in split_files:
                shutil.copy(str(f), str(split_class_dir / f.name))

# Contoh penggunaan:
split_data_folder(
    source_dir=r'C:\Users\PC\Documents\innar\data akhir valid merah',
    dest_dir=r'C:\Users\PC\Documents\innar\data_split_merah',
    train_ratio=0.7, val_ratio=0.2, test_ratio=0.1
)