In [29]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
import pickle
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

In [30]:
df = pd.read_csv('../data/resep_tfidf_features.csv')

In [31]:
# Pisahkan judul resep dan fitur bahan
recipe_titles = df.iloc[:, 0].tolist()
X_features_df = df.iloc[:, 1:]
ingredient_list = X_features_df.columns.tolist()

In [32]:
# Konversi data resep menjadi TENSOR TensorFlow
recipe_matrix_binary = tf.constant(X_features_df.values > 0, dtype=tf.bool)
recipe_matrix_tfidf = tf.constant(X_features_df.values, dtype=tf.float32)

In [33]:
print(f"Data berhasil dimuat ke dalam TensorFlow Tensor.")
print(f"Jumlah resep: {recipe_matrix_binary.shape[0]}, Jumlah bahan unik: {recipe_matrix_binary.shape[1]}")
print("-" * 40)

Data berhasil dimuat ke dalam TensorFlow Tensor.
Jumlah resep: 172, Jumlah bahan unik: 348
----------------------------------------


In [34]:
class RecipeClassifier(tf.keras.Model):
    def __init__(self, num_recipes):
        super(RecipeClassifier, self).__init__()
        self.dense1 = tf.keras.layers.Dense(256, activation='relu')
        self.dense2 = tf.keras.layers.Dense(128, activation='relu')
        self.output_layer = tf.keras.layers.Dense(num_recipes, activation='softmax')
        
    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        return self.output_layer(x)

In [35]:
# Initialize model
model = RecipeClassifier(len(recipe_titles))
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

In [36]:
# Training data
X_train = recipe_matrix_binary.numpy().astype(np.float32)
y_train = np.arange(len(recipe_titles))  # Each recipe is its own class

In [37]:
# Train the model
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, batch_size=32)

Epoch 1/50


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0000e+00 - loss: 5.1521
Epoch 2/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0640 - loss: 5.0356
Epoch 3/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2056 - loss: 4.9284
Epoch 4/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.3997 - loss: 4.8185
Epoch 5/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5060 - loss: 4.6938
Epoch 6/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5381 - loss: 4.5341
Epoch 7/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6485 - loss: 4.3328
Epoch 8/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6335 - loss: 4.1715
Epoch 9/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

<keras.src.callbacks.history.History at 0x297eb1b8940>

In [38]:
def find_recipe_final(ingredients_input, titles, ingredients, matrix_binary, verbose=True):
    """
    Memproses input bahan menggunakan logika filtering dan skoring murni dengan TensorFlow.
    `verbose=False` untuk mematikan print saat evaluasi.
    """
    if verbose:
        print(f"\n--- MENCARI RESEP UNTUK: {ingredients_input} ---")

    tokenized_input = []
    for item in ingredients_input:
        words = re.split(r'\s+', str(item).lower())
        tokenized_input.extend(words)
    
    input_mask_np = np.isin(ingredients, tokenized_input)
    
    if not np.any(input_mask_np):
        return "NO_MATCH_INGREDIENTS" # Return kode untuk evaluasi
    
    if verbose:
        print(f"Bahan dikenali: {[ing for ing in tokenized_input if ing in ingredients]}")
    
    input_mask = tf.constant(input_mask_np, dtype=tf.bool)
    
    missing_ingredients_mask = tf.logical_and(tf.logical_not(matrix_binary), input_mask)
    missing_count_per_recipe = tf.reduce_sum(tf.cast(missing_ingredients_mask, tf.int32), axis=1)
    passing_recipes_mask = tf.equal(missing_count_per_recipe, 0)
    
    if not tf.reduce_any(passing_recipes_mask):
        return "NO_MATCH_RECIPE" # Return kode untuk evaluasi

    intersection = tf.reduce_sum(tf.cast(tf.logical_and(matrix_binary, input_mask), tf.float32), axis=1)
    total_ingredients_in_recipe = tf.reduce_sum(tf.cast(matrix_binary, tf.float32), axis=1)
    efficiency_score = intersection / (total_ingredients_in_recipe + 1e-6)
    
    final_scores = tf.where(passing_recipes_mask, efficiency_score, 0.0)
    
    best_recipe_index = tf.argmax(final_scores)
    best_score = final_scores[best_recipe_index]
    
    if best_score <= 0:
        return "NO_MATCH_SCORE" # Return kode untuk evaluasi
        
    matched_recipe_name = titles[best_recipe_index]
    
    # Untuk pemanggilan normal, kembalikan string yang informatif
    if verbose:
        return f"Resep yang paling sesuai adalah: **{matched_recipe_name}** (Skor Kecocokan: {best_score:.2f})"
    # Untuk evaluasi, kembalikan hanya judulnya
    else:
        return matched_recipe_name

In [39]:
def run_evaluation_suite(data_df, titles, ingredients, model):
    print("\n--- MEMULAI EVALUASI KINERJA MODEL (Tanpa find_recipe_final) ---")
    y_true = []
    y_pred = []
    
    for i in range(len(titles)):
        true_title = titles[i]
        recipe_ingredients_series = data_df.iloc[i, 1:]
        important_ingredients = recipe_ingredients_series[recipe_ingredients_series > 0].sort_values(ascending=False)
        num_to_keep = max(2, int(len(important_ingredients) * 0.7))
        test_ingredients = important_ingredients.head(num_to_keep).index.tolist()

        # Tokenisasi dan ubah ke input vector
        tokenized_input = []
        for item in test_ingredients:
            tokenized_input.extend(re.split(r'\s+', str(item).lower()))

        input_vector = np.isin(ingredients, tokenized_input).astype(np.float32)
        
        # Jika tidak ada bahan dikenali, lewati
        if np.sum(input_vector) == 0:
            continue

        input_tensor = tf.convert_to_tensor([input_vector], dtype=tf.float32)
        prediction = model(input_tensor).numpy().squeeze()
        predicted_index = np.argmax(prediction)
        predicted_title = titles[predicted_index]

        y_true.append(true_title)
        y_pred.append(predicted_title)
        
        if (i + 1) % 40 == 0 or (i + 1) == len(titles):
            print(f"Evaluasi selesai untuk {i + 1}/{len(titles)} resep...")

    print("\n--- HASIL METRIK EVALUASI ---")
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Akurasi Keseluruhan: {accuracy:.2%}\n")
    print("Classification Report:")
    print(classification_report(y_true, y_pred, zero_division=0))

    unique_labels = sorted(list(set(y_true + y_pred)))
    if len(unique_labels) < 30:
        cm = confusion_matrix(y_true, y_pred, labels=unique_labels)
        plt.figure(figsize=(12, 10))
        sns.heatmap(cm, annot=True, fmt='d', xticklabels=unique_labels, yticklabels=unique_labels, cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('Label Sebenarnya (True)')
        plt.xlabel('Label Prediksi')
        plt.show()

In [40]:
def save_model_assets(filepath, titles, ingredients, model):
    model_assets = {
        "recipe_titles": titles,
        "ingredient_list": ingredients,
        "model_weights": model.get_weights()
    }
    with open(filepath, 'wb') as f:
        pickle.dump(model_assets, f)
    print(f"\nAset model berhasil disimpan ke: {filepath}")

save_model_assets("../model/recipe_model.pkl", recipe_titles, ingredient_list, model)


Aset model berhasil disimpan ke: ../model/recipe_model.pkl


In [41]:
with open("../model/recipe_model.pkl", 'rb') as f:
    loaded_assets = pickle.load(f)

loaded_model = RecipeClassifier(len(loaded_assets["recipe_titles"]))
_ = loaded_model(tf.zeros((1, len(loaded_assets["ingredient_list"]))))
loaded_model.set_weights(loaded_assets["model_weights"])
recipe_titles = loaded_assets["recipe_titles"]
ingredient_list = loaded_assets["ingredient_list"]

print(f"Total resep di dalam database: {len(recipe_titles)}")
print("-" * 40)

Total resep di dalam database: 172
----------------------------------------


In [42]:
def find_recipes(ingredients_input, num_recommendations=5, min_score_threshold=0.1):
    """
    Menemukan semua resep yang mengandung bahan input, diurutkan
    berdasarkan kecocokan tertinggi (Jaccard Similarity).
    """
    print(f"\n🍳 Mencari resep untuk: {ingredients_input}...")

    # 1. Tokenisasi dan persiapan input
    tokenized_input = []
    for item in ingredients_input:
        words = re.split(r'\s+', str(item).lower())
        tokenized_input.extend(words)
    
    input_mask_np = np.isin(ingredient_list, tokenized_input)
    
    recognized_ingredients = [ing for ing in tokenized_input if ing in ingredient_list]
    if not recognized_ingredients:
        return "Tidak ada bahan yang dikenali dalam sistem kami."
    
    input_mask = tf.constant(input_mask_np, dtype=tf.bool)
    
    # 2. Skoring Cerdas untuk SEMUA resep (tanpa filter awal)
    intersection = tf.reduce_sum(tf.cast(tf.logical_and(recipe_matrix_binary, input_mask), tf.float32), axis=1)
    union = tf.reduce_sum(tf.cast(tf.logical_or(recipe_matrix_binary, input_mask), tf.float32), axis=1)
    
    # Skor Jaccard dihitung untuk semua resep
    final_scores = intersection / (union + 1e-6) # Ditambah epsilon untuk menghindari pembagian dengan nol

    # 3. Cek apakah ada kecocokan sempurna (skor >= 0.999)
    perfect_match_indices = tf.where(final_scores >= 0.999).numpy().flatten()
    
    if len(perfect_match_indices) > 0:
        response = "**Resep Relevan Ditemukan!**\nBahan Anda cocok sempurna dengan resep berikut:\n"
        for idx in perfect_match_indices:
            response += f"- **{recipe_titles[idx]}**\n"
        return response
    
    # 4. Jika tidak ada yang sempurna, cari beberapa rekomendasi terbaik
    else:
        # Ambil top N rekomendasi dari semua skor
        top_k_scores, top_k_indices = tf.nn.top_k(final_scores, k=num_recommendations)
        
        recommendations = []
        for i in range(len(top_k_scores.numpy())):
            score = top_k_scores.numpy()[i]
            index = top_k_indices.numpy()[i]
            
            # Hanya tampilkan jika skor di atas ambang batas minimal
            if score >= min_score_threshold:
                recommendations.append((recipe_titles[index], score))
        
        if recommendations:
            response = "**Berikut Rekomendasi Resep yang Mengandung Bahan Anda:**\n(Diurutkan dari yang paling cocok)\n"
            for i, (title, score) in enumerate(recommendations):
                response += f"{i+1}. **{title}** (Skor Kecocokan: {score:.2f})\n"
            return response
        else:
            return f"Tidak ada resep yang cocok (Skor tertinggi di bawah ambang batas {min_score_threshold})."

In [43]:
bahan = ['mie instan goreng', 'telur', 'kaldu jamur']
hasil = find_recipes(bahan)
print(hasil)


🍳 Mencari resep untuk: ['mie instan goreng', 'telur', 'kaldu jamur']...
**Resep Relevan Ditemukan!**
Bahan Anda cocok sempurna dengan resep berikut:
- **Omelet Mie (instant goreng)**



In [44]:
bahan = ['mie', 'ayam']
hasil = find_recipes(bahan)
print(hasil)


🍳 Mencari resep untuk: ['mie', 'ayam']...
**Berikut Rekomendasi Resep yang Mengandung Bahan Anda:**
(Diurutkan dari yang paling cocok)
1. **Bubur Ayam Sat Set** (Skor Kecocokan: 0.33)
2. **Pizza mie instan** (Skor Kecocokan: 0.20)
3. **Mie goreng instan** (Skor Kecocokan: 0.20)
4. **Mie Kriuk (Mie Goreng Instan)** (Skor Kecocokan: 0.20)
5. **Omelet Mie (instant goreng)** (Skor Kecocokan: 0.17)

