In [35]:
import tensorflow as tf
import pathlib
from tensorflow.keras.applications.resnet50 import preprocess_input

In [36]:
#path dataset dan parameter
base_dir = pathlib.Path(".") 
data_split_dir = base_dir / 'dataset_final'

img_height = 224
img_width = 224
batch_size = 32
seed = 42
autotune = tf.data.AUTOTUNE

In [37]:
# --- HEADER UTAMA ---
print("==================PROSES PEMUATAN DATASET==================")
# 1. Load Train Dataset
print(f"\n[1/3] Memuat {data_split_dir / 'train'}")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_split_dir / 'train',
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True,
    seed=seed
)

# 2. Load Validation Dataset
print(f"\n[2/3] Memuat {data_split_dir / 'val'}")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_split_dir / 'val',
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=False, 
    seed=seed
)

# 3. Load Test Dataset
print(f"\n[3/3] Memuat {data_split_dir / 'test'}")
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_split_dir / 'test',
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=False,
    seed=seed
)

# --- Informasi Kelas & Mapping ---
class_names = train_ds.class_names

print(f"\n==================INFORMASI KELAS & MAPPING==================")
print(f"Kelas ditemukan: {class_names}")

# Convert class names to PPB values
try:
    class_to_ppb = {}
    for i, name in enumerate(class_names):
        class_to_ppb[i] = float(name)
except Exception as e:
    print("\n\u26A0 Warning: Nama folder bukan angka. Menggunakan index sebagai fallback.")
    class_to_ppb = {i: float(i + 1) for i in range(len(class_names))}

# Menampilkan Tabel Mapping yang Rapi
print(f"\n{'Index Model':<15} | {'Nama Folder':<15} | {'Nilai PPB (Target)':<20}")
print("-" * 60)
for idx, name in enumerate(class_names):
    ppb_val = class_to_ppb[idx]
    print(f"{idx:<15} | {name:<15} | {ppb_val:<20}")

print(f"\nSelesai. Dataset siap digunakan.")


[1/3] Memuat dataset_final\train
Found 1400 files belonging to 4 classes.

[2/3] Memuat dataset_final\val
Found 87 files belonging to 4 classes.

[2/3] Memuat dataset_final\val
Found 87 files belonging to 4 classes.

[3/3] Memuat dataset_final\test
Found 90 files belonging to 4 classes.

Kelas ditemukan: ['1', '2', '3', '4']

Index Model     | Nama Folder     | Nilai PPB (Target)  
------------------------------------------------------------
0               | 1               | 1.0                 
1               | 2               | 2.0                 
2               | 3               | 3.0                 
3               | 4               | 4.0                 

Selesai. Dataset siap digunakan.

[3/3] Memuat dataset_final\test
Found 90 files belonging to 4 classes.

Kelas ditemukan: ['1', '2', '3', '4']

Index Model     | Nama Folder     | Nilai PPB (Target)  
------------------------------------------------------------
0               | 1               | 1.0                 
1   

Step2: melabelkan masing" file menjadi nilai PPB menjadi groundtruth yang akan menjadi perbandingan utama untuk hasil prediksi model

In [38]:
print("Step 2: Converting labels to PPB values...")

# Function to convert integer labels to float PPB values
def convert_labels_to_ppb(images, labels):
    ppb_labels = tf.cast(labels, tf.float32)
    for class_idx, ppb_val in class_to_ppb.items():
        ppb_labels = tf.where(labels == class_idx, ppb_val, ppb_labels)
    # Ubah dimensi label menjadi (batch_size, 1)
    # Memastikan label memiliki dimensi yang benar untuk regresi
    ppb_labels = tf.expand_dims(ppb_labels, axis = -1) 
    return images, ppb_labels

# Apply label conversion
train_ds = train_ds.map(convert_labels_to_ppb, num_parallel_calls=autotune)
val_ds = val_ds.map(convert_labels_to_ppb, num_parallel_calls=autotune)
test_ds = test_ds.map(convert_labels_to_ppb, num_parallel_calls=autotune)

Step 2: Converting labels to PPB values...


In [39]:
print("Step 3: Setting up data augmentation...")

# Data augmentation using Keras layers (TensorFlow 2.x compatible)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),  # 0.1 radians â‰ˆ 5.7 degrees (smaller rotation)
    # tf.keras.layers.RandomBrightness(0.1),
    # tf.keras.layers.RandomContrast(0.1)
], name="data_augmentation")

# Preprocessing function with Keras augmentation
def preprocess_with_augmentation(images, labels, training=False):
    # Keep images in 0-255 range (ResNet50 preprocess_input expects 0-255)
    images = tf.cast(images, tf.float32)
    
    if training:
        # Apply augmentation (hanya untuk training)
        images = data_augmentation(images, training=True)
    
    # Normalisasi imageNet wajib, fungsi ini mengambil input RGB dan normalisasi ke range yg digunakan ResNet
    images = preprocess_input(images)
    
    return images, labels

# Apply preprocessing
train_ds = train_ds.map(lambda x, y: preprocess_with_augmentation(x, y, training=True), 
                       num_parallel_calls=autotune)
val_ds = val_ds.map(lambda x, y: preprocess_with_augmentation(x, y, training=False), 
                   num_parallel_calls=autotune)
test_ds = test_ds.map(lambda x, y: preprocess_with_augmentation(x, y, training=False), 
                     num_parallel_calls=autotune)


Step 3: Setting up data augmentation...


Step4: mempercepat proses pelatihan model dan mencegah  bottleneck; agar model tidak perlu menunggu krn batch data berikutnya sudah siap dan emnuggu di memori (RAM atau cache disk)

In [40]:
print("Step 4: Optimizing data pipeline...")

# Optimize performance with caching and prefetching
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=autotune)
val_ds = val_ds.cache().prefetch(buffer_size=autotune)
test_ds = test_ds.cache().prefetch(buffer_size=autotune)

print("Data preprocessing completed!")

Step 4: Optimizing data pipeline...
Data preprocessing completed!
