In [None]:
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [None]:
# Load data
df = pd.read_excel("/content/data_recipe.xlsx")
df.drop(columns=['province_id'], inplace=True, errors='ignore')

In [None]:
# Bersihkan teks bahan
def clean_text(text):
    if not isinstance(text, str):
        return ''
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

df['cleaned_bahan'] = df['bahan_bahan'].apply(clean_text)

In [None]:
# Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_bahan'])

In [None]:
# Encode ID Resep sebagai label
le = LabelEncoder()
y = le.fit_transform(df['food_id'])
y_cat = to_categorical(y)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X.toarray(), y_cat, test_size=0.2, random_state=42)

In [7]:
# Arsitektur model TensorFlow
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(y_cat.shape[1], activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m416/416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 184ms/step - accuracy: 1.5206e-04 - loss: 9.7376 - val_accuracy: 0.0018 - val_loss: 9.8869
Epoch 2/20
[1m416/416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 159ms/step - accuracy: 0.0010 - loss: 9.6732 - val_accuracy: 0.0018 - val_loss: 10.1812
Epoch 3/20
[1m416/416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 159ms/step - accuracy: 0.0017 - loss: 9.5292 - val_accuracy: 0.0018 - val_loss: 10.5713
Epoch 4/20
[1m416/416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 152ms/step - accuracy: 0.0020 - loss: 9.0293 - val_accuracy: 0.0018 - val_loss: 11.3660
Epoch 5/20
[1m416/416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 153ms/step - accuracy: 0.0114 - loss: 7.9553 - val_accuracy: 0.0018 - val_loss: 12.4656
Epoch 6/20
[1m416/416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 156ms/step - accuracy: 0.0544 - loss: 6.5304 - val_accuracy: 0.0018 - val_loss: 13.9002
E

<keras.src.callbacks.history.History at 0x7a21b6338610>

In [8]:
def recommend_recipe_ids(user_input, top_k=5):
    if not user_input or not isinstance(user_input, str):
        return "Input bahan tidak valid."

    cleaned_input = clean_text(user_input)
    user_vec = vectorizer.transform([cleaned_input])

    # Cosine similarity
    similarities = cosine_similarity(user_vec, X).flatten()
    top_indices = similarities.argsort()[-top_k:][::-1]

    # Output ID resep dan similarity
    results = []
    for idx in top_indices:
        resep_id = int(df.iloc[idx]['food_id'])
        similarity_score = round(similarities[idx], 4)
        results.append({"food_id": resep_id, "Similarity": similarity_score})

    return results

In [9]:
# --- 7. Contoh Penggunaan ---
user_input = "nasi, telur"
rekomendasi = recommend_recipe_ids(user_input)

print("Rekomendasi ID Resep:")
for r in rekomendasi:
    print(r)

Rekomendasi ID Resep:
{'food_id': 16698, 'Similarity': np.float64(0.6351)}
{'food_id': 9440, 'Similarity': np.float64(0.6011)}
{'food_id': 5205, 'Similarity': np.float64(0.5728)}
{'food_id': 11686, 'Similarity': np.float64(0.5264)}
{'food_id': 2380, 'Similarity': np.float64(0.5114)}


In [10]:
import joblib

# 1. Simpan model Keras (TensorFlow)
model.save("resep_model.h5")

# 2. Simpan TF-IDF Vectorizer
joblib.dump(vectorizer, "vectorizer.pkl")

# 3. Simpan Label Encoder
joblib.dump(le, "label_encoder.pkl")

print("✅ Model dan semua komponen berhasil disimpan.")



✅ Model dan semua komponen berhasil disimpan.
