In [50]:
import pandas as pd
import re

# Load dataset
data = pd.read_csv('recipes.csv')

# Bersihkan kolom ingredients
def clean_ingredients(ingredients):
    if isinstance(ingredients, str):
        return re.sub(r'[^\w\s]', '', ingredients.lower())  # Hapus karakter khusus dan ubah ke lowercase
    return ''

data['ingredients_cleaned'] = data['RecipeIngredientParts'].apply(clean_ingredients)
data = data[['ingredients_cleaned', 'Name']].dropna()

print(data.head())


                                 ingredients_cleaned  \
0  blueberries granulated sugar vanilla yogurt le...   
1  saffron milk hot green chili peppers onions ga...   
2  sugar lemons rind of lemon zest of fresh water...   
3  extra firm tofu eggplant zucchini mushrooms so...   
4    plain tomato juice cabbage onion carrots celery   

                                Name  
0  Low-Fat Berry Blue Frozen Dessert  
1                            Biryani  
2                      Best Lemonade  
3     Carina's Tofu-Vegetable Kebabs  
4                       Cabbage Soup  


In [51]:
from sklearn.feature_extraction.text import TfidfVectorizer

# TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
ingredients_tfidf = vectorizer.fit_transform(data['ingredients_cleaned'])

print("TF-IDF Matrix Shape:", ingredients_tfidf.shape)


TF-IDF Matrix Shape: (104171, 2406)


In [56]:
from sklearn.metrics.pairwise import cosine_similarity

# Fungsi untuk merekomendasikan makanan
def recommend_food(user_input, top_n=5):
    # Preprocessing input user
    user_input_cleaned = clean_ingredients(user_input)
    user_tfidf = vectorizer.transform([user_input_cleaned])
    
    # Hitung kemiripan
    similarity_scores = cosine_similarity(user_tfidf, ingredients_tfidf)
    
    # Ambil indeks makanan dengan skor tertinggi
    top_indices = similarity_scores[0].argsort()[-top_n:][::-1]
    
    # Tampilkan hasil rekomendasi
    recommendations = data.iloc[top_indices][['Name', 'ingredients_cleaned']]
    return recommendations

# Contoh input user
user_input = "garlic butter salt"
recommendations = recommend_food(user_input)
print("\nRekomendasi makanan:")
print(recommendations)



Rekomendasi makanan:
                                      Name ingredients_cleaned
7162         Ultimate Instant Garlic Toast  garlic butter salt
12936     Crushed Red Potatoes with Garlic       butter garlic
41294                   Matzoh Board Snack         butter salt
21457         Microwavable Corn On The Cob         salt butter
57200  Kittencal's Perfect Pan-Fried Steak         butter salt


In [57]:
import joblib

# Simpan TF-IDF Vectorizer
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

# Simpan dataset
data.to_csv('processed_recipes.csv', index=False)


In [73]:
# Load TF-IDF Vectorizer dan dataset
vectorizer = joblib.load('tfidf_vectorizer.pkl')
data = pd.read_csv('processed_recipes.csv')

# Gunakan fungsi rekomendasi
user_input = "mustard "
recommendations = recommend_food(user_input)
print("\nRekomendasi makanan:")
print(recommendations)



Rekomendasi makanan:
                                                     Name  \
100438                                    Hot Dog Mummies   
94898             Apricot-Mustard Grilled Pork Tenderloin   
9697                                            Frito Pie   
25973   Frankfurter Wurstchen Im Schlafrock (Hot and C...   
79427                         Pigs in a Blanket Appetizer   

                         ingredients_cleaned  
100438                               mustard  
94898                    mustard salt pepper  
9697                          onions mustard  
25973   sweet mustard mustard ketchup butter  
79427                          mustard honey  
