In [78]:
import tensorflow as tf
import numpy as np
import cv2
import pathlib
import os
import pandas as pd

In [88]:
#path dataset dan parameter
# data_dir = pathlib.Path(r"C:\Users\PC\Documents\innar\data akhir valid merah")
data_dir = pathlib.Path(r"D:\SKRIPSI\kode\data akhir valid merah")
img_height = 224
img_width = 224
batch_size = 32

# Membaca dataset (label dari nama folder)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    labels="inferred",
    label_mode="int",
    color_mode="grayscale",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True,
    seed=42
)

print(train_ds.class_names)

Found 507 files belonging to 10 classes.
['0.5', '1', '1.5', '2', '2.5', '3', '3.5', '4', '5', '6']


In [89]:
#TAHAP PREPROCESSING
# Enhancement CLAHE
def apply_clahe_np(img):
    img = (img * 255).astype(np.uint8)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(img)
    return enhanced.astype(np.float32) / 255.0

# Augmentasi data
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1)
])

In [64]:
#MENJADIKAN DATA TABULAR
#ambil nama file dari dataset
file_paths = list(train_ds.file_paths)

#list untuk menyimpan hasil tabular
data_list = []

#loop per item dataset
for i, (img_batch, label_batch) in enumerate(train_ds):
    img = img_batch[0].numpy().squeeze() / 255.0  # Normalisasi
    clahe_img = apply_clahe_np(img)
    clahe_img = np.expand_dims(clahe_img, axis=-1)
    aug_img = data_augmentation(tf.convert_to_tensor(clahe_img[None, ...]))[0].numpy().squeeze()
    
    # Nama file asli
    file_name = os.path.basename(file_paths[i])
    # Ambil nama folder (label ppb) dari path file
    label_ppb = float(os.path.basename(os.path.dirname(file_paths[i])))

    data_list.append({
        "filename": file_name,
        "label_ppb": label_ppb,
        # "image_array": img  # atau ganti dengan img / clahe_img jika mau simpan hasil lain
    })

#konversi ke dataframe
df = pd.DataFrame(data_list)

#tampilkan ringkasan
print(df.head())

      filename  label_ppb
0  m1,5-46.png        1.5
1    m2-33.png        2.0
2     m4-9.png        4.0
3    m1-44.png        1.0
4    m3-57.png        3.0


In [65]:
model = tf.keras.applications.ResNet50(
    input_shape=(224, 224, 3),  # HARUS RGB
    include_top=False, 
    weights=None
)

In [None]:
# Script pembagian data: 70% train, 20% validasi, 10% test
import shutil
from sklearn.model_selection import train_test_split

def split_data_folder(source_dir, dest_dir, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    source_dir = pathlib.Path(source_dir)
    dest_dir = pathlib.Path(dest_dir)
    dest_dir.mkdir(parents=True, exist_ok=True)

    for class_folder in source_dir.iterdir():
        if not class_folder.is_dir():
            continue
        files = list(class_folder.glob('*.png'))
        if len(files) == 0:
            continue

        train_files, temp_files = train_test_split(files, train_size=train_ratio, random_state=42)
        val_size = val_ratio / (val_ratio + test_ratio)
        val_files, test_files = train_test_split(temp_files, train_size=val_size, random_state=42)

        for split_name, split_files in zip(['train', 'val', 'test'], [train_files, val_files, test_files]):
            split_class_dir = dest_dir / split_name / class_folder.name
            split_class_dir.mkdir(parents=True, exist_ok=True)
            for f in split_files:
                shutil.copy(str(f), str(split_class_dir / f.name))

# Contoh penggunaan:
split_data_folder(
    # source_dir=r'C:\Users\PC\Documents\innar\data akhir valid merah',
    # dest_dir=r'C:\Users\PC\Documents\innar\data_split_merah',
    source_dir=r'D:\SKRIPSI\kode\data akhir valid merah',
    dest_dir=r'D:\SKRIPSI\kode\data_split_merah',
    train_ratio=0.7, val_ratio=0.2, test_ratio=0.1
)

In [91]:
data_split_dir = pathlib.Path(r"D:\SKRIPSI\kode\data_split_merah")
img_height = 224
img_width = 224
batch_size = 32
print("Loading datasets...")

Loading datasets...


In [93]:
# Load train dataset
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_split_dir / 'train',
    labels="inferred",
    label_mode="int",
    color_mode="rgb",  # RGB untuk ResNet-50
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True,
    seed=42
)

# Load validation dataset
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_split_dir / 'val',
    labels="inferred",
    label_mode="int", 
    color_mode="rgb",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=False,
    seed=42
)

# Load test dataset
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_split_dir / 'test',
    labels="inferred",
    label_mode="int",
    color_mode="rgb", 
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=False,
    seed=42
)

class_names = train_ds.class_names
print(f"Classes found: {class_names}")

# Convert class names to PPB values for regression
class_to_ppb = {i: float(name) for i, name in enumerate(class_names)}
print(f"Class to PPB mapping: {class_to_ppb}")

Found 349 files belonging to 10 classes.
Found 104 files belonging to 10 classes.
Found 54 files belonging to 10 classes.
Classes found: ['0.5', '1', '1.5', '2', '2.5', '3', '3.5', '4', '5', '6']
Class to PPB mapping: {0: 0.5, 1: 1.0, 2: 1.5, 3: 2.0, 4: 2.5, 5: 3.0, 6: 3.5, 7: 4.0, 8: 5.0, 9: 6.0}


In [97]:
print("Step 2: Converting labels to PPB values...")

# Function to convert integer labels to float PPB values
def convert_labels_to_ppb(images, labels):
    ppb_labels = tf.cast(labels, tf.float32)
    for class_idx, ppb_val in class_to_ppb.items():
        ppb_labels = tf.where(labels == class_idx, ppb_val, ppb_labels)
    return images, ppb_labels

# Apply label conversion
train_ds = train_ds.map(convert_labels_to_ppb, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.map(convert_labels_to_ppb, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.map(convert_labels_to_ppb, num_parallel_calls=tf.data.AUTOTUNE)

Step 2: Converting labels to PPB values...


In [99]:
print("Step 3: Setting up data augmentation...")

# Data augmentation using Keras layers (TensorFlow 2.x compatible)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),  # 0.1 radians ≈ 5.7 degrees
    tf.keras.layers.RandomBrightness(0.1),
    tf.keras.layers.RandomContrast(0.1),
    tf.keras.layers.RandomZoom(0.1),
], name="data_augmentation")

# Preprocessing function with Keras augmentation
def preprocess_with_augmentation(images, labels, training=False):
    # Normalize images to [0, 1] range
    images = tf.cast(images, tf.float32) / 255.0
    
    if training:
        # Apply augmentation using Keras layers
        images = data_augmentation(images, training=True)
    
    return images, labels

# Apply preprocessing
train_ds = train_ds.map(lambda x, y: preprocess_with_augmentation(x, y, training=True), 
                       num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.map(lambda x, y: preprocess_with_augmentation(x, y, training=False), 
                   num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.map(lambda x, y: preprocess_with_augmentation(x, y, training=False), 
                     num_parallel_calls=tf.data.AUTOTUNE)


Step 3: Setting up data augmentation...


In [101]:
print("Step 4: Optimizing data pipeline...")

# Optimize performance with caching and prefetching
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("Data preprocessing completed!")

Step 4: Optimizing data pipeline...
Data preprocessing completed!


In [102]:
print("Step 5: Building ResNet-50 model...")

# Base ResNet-50 model (pre-trained on ImageNet)
base_model = ResNet50(
    weights='imagenet',  # Use pre-trained weights
    include_top=False,   # Exclude top classification layer
    input_shape=(224, 224, 3)
)

# Freeze base model initially for transfer learning
base_model.trainable = False

# Build the complete model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu', name='dense_512'),
    layers.Dropout(0.5, name='dropout_1'),
    layers.Dense(256, activation='relu', name='dense_256'),
    layers.Dropout(0.3, name='dropout_2'),
    layers.Dense(64, activation='relu', name='dense_64'),
    layers.Dense(1, activation='linear', name='aflatoxin_output')  # Regression output
], name='AflatoxinEstimator')

# Compile model for regression
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='mse',  # Mean Squared Error for regression
    metrics=['mae', 'mse']
)

print("Model compiled successfully!")
print(f"Total parameters: {model.count_params():,}")

# Display model architecture
model.summary()

Step 5: Building ResNet-50 model...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 0us/step
Model compiled successfully!
Total parameters: 24,784,641


In [103]:
print("Step 6: Setting up training callbacks...")

# Training callbacks for better training control
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'best_aflatoxin_resnet50.keras',
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1
    )
]

Step 6: Setting up training callbacks...


In [104]:
print("Step 7: Phase 1 - Training with frozen base model...")

# Phase 1: Train with frozen base model
initial_epochs = 20

history_1 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=initial_epochs,
    callbacks=callbacks,
    verbose=1
)

Step 7: Phase 1 - Training with frozen base model...
Epoch 1/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 2.4107 - mae: 1.2631 - mse: 2.4107
Epoch 1: val_loss improved from None to 0.44821, saving model to best_aflatoxin_resnet50.keras
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2s/step - loss: 1.5402 - mae: 0.9788 - mse: 1.5402 - val_loss: 0.4482 - val_mae: 0.5572 - val_mse: 0.4482 - learning_rate: 1.0000e-04
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.9268 - mae: 0.7636 - mse: 0.9268
Epoch 2: val_loss did not improve from 0.44821
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2s/step - loss: 0.9091 - mae: 0.7629 - mse: 0.9091 - val_loss: 0.5370 - val_mae: 0.5469 - val_mse: 0.5370 - learning_rate: 1.0000e-04
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.6804 - mae: 0.6460 - mse: 0.6804
Epoch 3: val_loss did not imp