In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

%run preprocess.ipynb

Step 1: Splitting data...
Loading datasets...
Found 657 files belonging to 4 classes.
Found 216 files belonging to 4 classes.
Found 114 files belonging to 4 classes.

Classes found: ['1', '2', '3', '4']
Class to PPB mapping: {0: 1.0, 1: 2.0, 2: 3.0, 3: 4.0}
Step 2: Converting labels to PPB values...
Step 3: Setting up data augmentation...
Step 4: Optimizing data pipeline...
Data preprocessing completed!


In [None]:
print("Step 5: Building ResNet-50 model...")

# Base ResNet-50 model (pre-trained on ImageNet)
base_model = ResNet50(
    weights='imagenet',  # Use pre-trained weights
    include_top=False,   # Exclude top classification layer
    input_shape=(224, 224, 3)
)

# Freeze base model initially for transfer learning
base_model.trainable = False

# Build the complete model using Functional API (more stable)
inputs = tf.keras.Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu', name='dense_512')(x)
x = layers.Dropout(0.5, name='dropout_1')(x)
x = layers.Dense(256, activation='relu', name='dense_256')(x)
x = layers.Dropout(0.3, name='dropout_2')(x)
x = layers.Dense(64, activation='relu', name='dense_64')(x)
outputs = layers.Dense(1, activation='linear', name='aflatoxin_output')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs, name='AflatoxinEstimator')

# Compile model for regression
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='mse',  # Mean Squared Error for regression
    metrics=['mae', 'mse']
)

print("Model compiled successfully!")
print(f"Total parameters: {model.count_params():,}")

# Display model architecture
model.summary()

Step 5: Building ResNet-50 model...
Model compiled successfully!
Total parameters: 24,784,641


In [7]:
print("Step 6: Setting up training callbacks...")

# Training callbacks for better training control
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'best_aflatoxin_resnet50.keras',
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1
    )
]

Step 6: Setting up training callbacks...


In [8]:
print("Step 7: Phase 1 - Training with frozen base model...")

# Phase 1: Train with frozen base model
initial_epochs = 20

history_1 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=initial_epochs,
    callbacks=callbacks,
    verbose=1
)

Step 7: Phase 1 - Training with frozen base model...
Epoch 1/20
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 691ms/step - loss: 4.9452 - mae: 1.8732 - mse: 4.9452
Epoch 1: val_loss improved from inf to 1.33454, saving model to best_aflatoxin_resnet50.keras
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 1s/step - loss: 4.8621 - mae: 1.8538 - mse: 4.8621 - val_loss: 1.3345 - val_mae: 1.0171 - val_mse: 1.3345 - learning_rate: 1.0000e-04
Epoch 2/20
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 699ms/step - loss: 1.5384 - mae: 1.0313 - mse: 1.5384
Epoch 2: val_loss did not improve from 1.33454
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 937ms/step - loss: 1.5411 - mae: 1.0327 - mse: 1.5411 - val_loss: 1.6093 - val_mae: 1.0807 - val_mse: 1.6093 - learning_rate: 1.0000e-04
Epoch 3/20
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 671ms/step - loss: 1.5478 - mae: 1.0359 - mse: 1.5478
Epoch 3: val_loss 

In [None]:
print("Step 8: Phase 2 - Fine-tuning...")

# Phase 2: Fine-tuning
# Unfreeze the base model
base_model.trainable = True

# Fine-tune from this layer onwards (keep early layers frozen)
fine_tune_at = 140  # Unfreeze top layers only

# Freeze all layers before fine_tune_at
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

print(f"Unfreezing {len(base_model.layers) - fine_tune_at} layers for fine-tuning")

# Recompile with lower learning rate for fine-tuning
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),  # Lower learning rate
    loss='mse',
    metrics=['mae', 'mse']
)

# Continue training with fine-tuning
fine_tune_epochs = 30
total_epochs = initial_epochs + fine_tune_epochs

history_2 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=total_epochs,
    initial_epoch=initial_epochs,
    callbacks=callbacks,
    verbose=1
)

In [None]:
print("Step 9: Combining training histories...")

# Combine training histories from both phases
def combine_histories(hist1, hist2):
    combined_history = {}
    for key in hist1.history.keys():
        combined_history[key] = hist1.history[key] + hist2.history[key]
    return combined_history

history_combined = combine_histories(history_1, history_2)

In [None]:
print("Step 10: Plotting training history...")

# Plot training history
def plot_training_history(history):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Loss plot
    axes[0, 0].plot(history['loss'], label='Training Loss', color='blue')
    axes[0, 0].plot(history['val_loss'], label='Validation Loss', color='red')
    axes[0, 0].set_title('Model Loss (MSE)')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss (MSE)')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Add phase separator
    axes[0, 0].axvline(x=20, color='green', linestyle='--', alpha=0.7, label='Fine-tuning starts')
    
    # MAE plot
    axes[0, 1].plot(history['mae'], label='Training MAE', color='blue')
    axes[0, 1].plot(history['val_mae'], label='Validation MAE', color='red')
    axes[0, 1].set_title('Mean Absolute Error')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('MAE (PPB)')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    axes[0, 1].axvline(x=20, color='green', linestyle='--', alpha=0.7)
    
    # MSE plot
    axes[1, 0].plot(history['mse'], label='Training MSE', color='blue')
    axes[1, 0].plot(history['val_mse'], label='Validation MSE', color='red')
    axes[1, 0].set_title('Mean Squared Error')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('MSE')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    axes[1, 0].axvline(x=20, color='green', linestyle='--', alpha=0.7)
    
    # Training summary
    axes[1, 1].text(0.5, 0.7, 'Training Complete!', 
                   horizontalalignment='center', verticalalignment='center', 
                   transform=axes[1, 1].transAxes, fontsize=16, fontweight='bold')
    axes[1, 1].text(0.5, 0.5, f'Phase 1: {initial_epochs} epochs (frozen base)', 
                   horizontalalignment='center', verticalalignment='center', 
                   transform=axes[1, 1].transAxes, fontsize=12)
    axes[1, 1].text(0.5, 0.3, f'Phase 2: {fine_tune_epochs} epochs (fine-tuning)', 
                   horizontalalignment='center', verticalalignment='center', 
                   transform=axes[1, 1].transAxes, fontsize=12)
    axes[1, 1].axis('off')
    
    plt.tight_layout()
    plt.show()

plot_training_history(history_combined)

In [None]:
print("Step 11: Evaluating model on test set...")

# Evaluate model on test set
# Get predictions and true values
y_true = []
y_pred = []

print("Making predictions on test set...")
for images, labels in test_ds:
    predictions = model.predict(images, verbose=0)
    y_pred.extend(predictions.flatten())
    y_true.extend(labels.numpy())

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Calculate regression metrics
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)

print("="*50)
print("FINAL TEST RESULTS")
print("="*50)
print(f"Mean Absolute Error (MAE): {mae:.3f} PPB")
print(f"Mean Squared Error (MSE): {mse:.3f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.3f} PPB")
print(f"R² Score: {r2:.3f}")

# Classification into risk categories
def classify_risk(ppb_value):
    if ppb_value <= 75:
        return "Rendah (0-75 PPB)"
    elif ppb_value <= 150:
        return "Sedang (76-150 PPB)"
    else:
        return "Tinggi (151-200 PPB)"

# Apply risk classification
y_true_class = [classify_risk(val) for val in y_true]
y_pred_class = [classify_risk(val) for val in y_pred]

# Calculate classification accuracy
correct = sum(1 for true, pred in zip(y_true_class, y_pred_class) if true == pred)
accuracy = correct / len(y_true_class)

print(f"Risk Classification Accuracy: {accuracy:.3f} ({accuracy*100:.1f}%)")

# Show classification distribution
from collections import Counter
true_dist = Counter(y_true_class)
pred_dist = Counter(y_pred_class)

print("\nRisk Category Distribution:")
print("Actual:", dict(true_dist))
print("Predicted:", dict(pred_dist))

In [None]:
print("Step 12: Plotting prediction results...")

# Plot predictions vs actual values
def plot_predictions(y_true, y_pred):
    plt.figure(figsize=(12, 8))
    
    # Scatter plot
    plt.scatter(y_true, y_pred, alpha=0.6, s=50, color='blue', edgecolors='black', linewidth=0.5)
    
    # Perfect prediction line
    min_val = min(y_true.min(), y_pred.min())
    max_val = max(y_true.max(), y_pred.max())
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect Prediction')
    
    # Risk category boundaries
    plt.axhline(y=75, color='orange', linestyle='--', alpha=0.7, linewidth=2, label='Rendah-Sedang (75 PPB)')
    plt.axhline(y=150, color='red', linestyle='--', alpha=0.7, linewidth=2, label='Sedang-Tinggi (150 PPB)')
    plt.axvline(x=75, color='orange', linestyle='--', alpha=0.7, linewidth=2)
    plt.axvline(x=150, color='red', linestyle='--', alpha=0.7, linewidth=2)
    
    # Background colors for risk zones
    plt.axhspan(0, 75, alpha=0.1, color='green', label='Risk Zone: Rendah')
    plt.axhspan(75, 150, alpha=0.1, color='orange', label='Risk Zone: Sedang')
    plt.axhspan(150, 200, alpha=0.1, color='red', label='Risk Zone: Tinggi')
    
    plt.xlabel('Actual Aflatoxin Content (PPB)', fontsize=12, fontweight='bold')
    plt.ylabel('Predicted Aflatoxin Content (PPB)', fontsize=12, fontweight='bold')
    plt.title('Predicted vs Actual Aflatoxin Content\nResNet-50 Model Performance', fontsize=14, fontweight='bold')
    plt.legend(loc='upper left')
    plt.grid(True, alpha=0.3)
    
    # Add metrics text box
    textstr = f'MAE: {mae:.2f} PPB\nRMSE: {rmse:.2f} PPB\nR²: {r2:.3f}\nClassification Acc: {accuracy:.1%}'
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
    plt.text(0.02, 0.98, textstr, transform=plt.gca().transAxes, fontsize=11,
             verticalalignment='top', bbox=props)
    
    plt.tight_layout()
    plt.show()

plot_predictions(y_true, y_pred)


In [None]:
print("Step 13: Saving models...")

# Save final model
model.save('aflatoxin_resnet50_final.keras')
print("✅ Final model saved as 'aflatoxin_resnet50_final.keras'")

# Save model weights only
model.save_weights('aflatoxin_resnet50_weights.weights.h5')
print("✅ Model weights saved as 'aflatoxin_resnet50_weights.weights.h5'")

print("="*60)
print("TRAINING COMPLETED SUCCESSFULLY!")
print("="*60)
print(f"Best model saved with validation loss monitoring")
print(f"Final model performance:")
print(f"  - MAE: {mae:.3f} PPB")
print(f"  - RMSE: {rmse:.3f} PPB") 
print(f"  - R² Score: {r2:.3f}")
print(f"  - Risk Classification Accuracy: {accuracy:.1%}")
print("="*60)