# Food Classifier: Training, Evaluation, and Prediction

Notebook ini menggabungkan proses training dan prediksi menjadi satu.

Fitur:
- Train 3 model (EfficientNetB0, MobileNetV2, ResNet50V2) dan pilih akurasi terbaik.
- Dataset di `data/dataset_gambar/` dengan subfolder `train/`, `valid/`, `test/`.
- Simpan model terbaik ke `models/best_model.keras` dan class names ke `models/class_names.json`.
- Prediksi gambar dan integrasi nutrisi dari `data/Nutrition.csv` (per 100g) dengan opsi kustomisasi.
- Dapat digunakan oleh Streamlit app di `app/streamlit_app.py`.

In [None]:
# Konfigurasi path
from pathlib import Path
BASE_DIR = Path('c:/Users/LQO/food_predictor')
DATA_DIR = BASE_DIR / 'data'
IMG_DIR = DATA_DIR / 'dataset_gambar'
MODELS_DIR = BASE_DIR / 'models'
MODELS_DIR.mkdir(parents=True, exist_ok=True)
NUTRITION_CSV = DATA_DIR / 'Nutrition.csv'
print('DATA_DIR:', DATA_DIR)
print('IMG_DIR:', IMG_DIR)
print('MODELS_DIR:', MODELS_DIR)
print('NUTRITION_CSV:', NUTRITION_CSV)

In [None]:
# Imports
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0, MobileNetV2, ResNet50V2
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_efficient
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as preprocess_mobilenet
from tensorflow.keras.applications.resnet_v2 import preprocess_input as preprocess_resnet
import matplotlib.pyplot as plt
import os, shutil
AUTOTUNE = tf.data.AUTOTUNE
SEED = 42
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 10
PATIENCE = 3
print(tf.__version__)

In [None]:
# Load datasets
train_ds = tf.keras.utils.image_dataset_from_directory(
    IMG_DIR / 'train',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    IMG_DIR / 'valid',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED
)
test_ds = tf.keras.utils.image_dataset_from_directory(
    IMG_DIR / 'test',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)
class_names = train_ds.class_names
print('Classes:', class_names)
# Cache/prefetch
def prepare(ds):
    return ds.cache().prefetch(AUTOTUNE)
train_ds_prep = prepare(train_ds)
val_ds_prep = prepare(val_ds)
test_ds_prep = prepare(test_ds)
# Save class names for inference
with open(MODELS_DIR / 'class_names.json', 'w', encoding='utf-8') as f:
    json.dump(class_names, f, ensure_ascii=False, indent=2)
(MODELS_DIR / 'class_names.json').as_posix()

In [None]:
# Data augmentation
data_augmentation = keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
], name='data_augmentation')
num_classes = len(class_names)

In [None]:
# Helper untuk build model transfer learning
def build_model(arch: str):
    inputs = keras.Input(shape=(*IMG_SIZE, 3))
    x = data_augmentation(inputs)
    if arch == 'efficientnetb0':
        base = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(*IMG_SIZE, 3))
        preprocess = preprocess_efficient
    elif arch == 'mobilenetv2':
        base = MobileNetV2(include_top=False, weights='imagenet', input_shape=(*IMG_SIZE, 3))
        preprocess = preprocess_mobilenet
    elif arch == 'resnet50v2':
        base = ResNet50V2(include_top=False, weights='imagenet', input_shape=(*IMG_SIZE, 3))
        preprocess = preprocess_resnet
    else:
        raise ValueError('Unknown arch')
    # Preprocess before base
    x = layers.Lambda(preprocess, name=arch + '_preprocess')(x)
    base.trainable = False
    x = base(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = keras.Model(inputs, outputs, name=f'{arch}_clf')
    model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

callbacks = [
    keras.callbacks.EarlyStopping(patience=PATIENCE, restore_best_weights=True, monitor='val_accuracy')
]
archs = ['efficientnetb0', 'mobilenetv2', 'resnet50v2']
histories = {}
val_accuracies = {}
saved_models = {}
for arch in archs:
    print(f'\nTraining {arch} ...')
    model = build_model(arch)
    ckpt_path = MODELS_DIR / f'{arch}.keras'
    ckpt_cb = keras.callbacks.ModelCheckpoint(ckpt_path.as_posix(), monitor='val_accuracy', save_best_only=True)
    hist = model.fit(
        train_ds_prep,
        validation_data=val_ds_prep,
        epochs=EPOCHS,
        callbacks=callbacks + [ckpt_cb]
    )
    histories[arch] = hist.history
    # evaluate best checkpoint
    best_model = keras.models.load_model(ckpt_path.as_posix())
    _, val_acc = best_model.evaluate(val_ds_prep, verbose=0)
    val_accuracies[arch] = float(val_acc)
    saved_models[arch] = ckpt_path.as_posix()

val_accuracies

In [None]:
# Pilih model terbaik dan simpan sebagai best_model.keras
best_arch = max(val_accuracies, key=val_accuracies.get)
print('Best arch:', best_arch, 'val_acc=', val_accuracies[best_arch])
best_model_path = MODELS_DIR / f'{best_arch}.keras'
best_model = keras.models.load_model(best_model_path.as_posix())
final_path = MODELS_DIR / 'best_model.keras'
shutil.copy(best_model_path, final_path)
final_path.as_posix()

In [None]:
# Evaluate on test set
test_loss, test_acc = best_model.evaluate(test_ds_prep, verbose=1)
print({'test_loss': float(test_loss), 'test_acc': float(test_acc)})

In [None]:
# Load Nutrition CSV dan buat mapping
nutri_df = pd.read_csv(NUTRITION_CSV)
# pastikan kolom food_name sesuai dengan class_names
nutri_map = {row['food_name']: {
    'calories': float(row['calories']),
    'protein': float(row['protein']),
    'fat': float(row['fat']),
    'carbs': float(row['carbs']),
} for _, row in nutri_df.iterrows()}
list(nutri_map.items())[:3]

In [None]:
# Util prediksi dari image path atau array
from PIL import Image
def load_image_for_model(img_path_or_bytes):
    if isinstance(img_path_or_bytes, (str, Path)):
        img = Image.open(img_path_or_bytes).convert('RGB')
    else:
        img = Image.open(img_path_or_bytes).convert('RGB')
    img = img.resize(IMG_SIZE)
    arr = np.array(img).astype('float32')
    return arr

def predict_image(img_path_or_bytes, grams: float = 100.0, custom_per100g: dict | None = None):
    class_path = MODELS_DIR / 'class_names.json'
    with open(class_path, 'r', encoding='utf-8') as f:
        cls = json.load(f)
    model = keras.models.load_model((MODELS_DIR / 'best_model.keras').as_posix())
    arr = load_image_for_model(img_path_or_bytes)
    x = np.expand_dims(arr, 0)
    # pilih preprocess berdasar arsitektur dari nama file best
    best_name = None
    for cand in ['efficientnetb0', 'mobilenetv2', 'resnet50v2']:
        if (MODELS_DIR / f'{cand}.keras').exists():
            if os.path.samefile((MODELS_DIR / f'{cand}.keras'), (MODELS_DIR / 'best_model.keras')):
                best_name = cand
                break
    if best_name == 'efficientnetb0':
        x = preprocess_efficient(x)
    elif best_name == 'mobilenetv2':
        x = preprocess_mobilenet(x)
    else:
        x = preprocess_resnet(x)
    probs = model.predict(x, verbose=0)[0]
    idx = int(np.argmax(probs))
    label = cls[idx]
    conf = float(probs[idx])
    per100 = nutri_map.get(label, {'calories': 0, 'protein': 0, 'fat': 0, 'carbs': 0})
    if custom_per100g is not None:
        per100 = {**per100, **{k: float(v) for k, v in custom_per100g.items() if k in per100}}
    factor = grams / 100.0
    per_serving = {k: round(v * factor, 2) for k, v in per100.items()}
    return {
        'label': label,
        'confidence': round(conf, 4),
        'per_100g': per100,
        'grams': grams,
        'per_serving': per_serving
    }

# Contoh pakai: 
# predict_image('path_ke_gambar.jpg', grams=150, custom_per100g={'calories': 120})