In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import pandas as pd
data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Capstone Bangkit/merged_data.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106214 entries, 0 to 106213
Data columns (total 19 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   RecipeId                    106214 non-null  int64  
 1   Name                        106214 non-null  object 
 2   Images                      106214 non-null  object 
 3   RecipeCategory              106214 non-null  object 
 4   Keywords                    106214 non-null  object 
 5   RecipeIngredientQuantities  106214 non-null  object 
 6   RecipeIngredientParts       106214 non-null  object 
 7   Calories                    106214 non-null  float64
 8   FatContent                  106214 non-null  float64
 9   SaturatedFatContent         106214 non-null  float64
 10  CholesterolContent          106214 non-null  float64
 11  SodiumContent               106214 non-null  float64
 12  CarbohydrateContent         106214 non-null  float64
 13  FiberContent  

In [None]:
import re

# Bersihkan kolom ingredients
def clean_ingredients(ingredients):
    if isinstance(ingredients, str):
        # Pisahkan bahan-bahan yang dipisahkan koma, hapus karakter khusus, dan ubah menjadi huruf kecil
        ingredients_list = ingredients.lower().split(',')
        cleaned_ingredients = [re.sub(r'[^\w\s]', '', ingredient.strip()) for ingredient in ingredients_list]
        return ' '.join(cleaned_ingredients)  # Gabungkan kembali bahan yang sudah dibersihkan menjadi satu string
    return ''




In [None]:
data['ingredients_cleaned'] = data['RecipeIngredientParts'].apply(clean_ingredients)
data = data[['ingredients_cleaned', 'Name']].dropna()

data.head()

Unnamed: 0,ingredients_cleaned,Name
0,blueberries granulated sugar vanilla yogurt le...,Low-Fat Berry Blue Frozen Dessert
1,saffron milk hot green chili peppers onions ga...,Biryani
2,sugar lemons rind of lemon zest of fresh water...,Best Lemonade
3,extra firm tofu eggplant zucchini mushrooms so...,Carina's Tofu-Vegetable Kebabs
4,plain tomato juice cabbage onion carrots celery,Cabbage Soup


## Convert food ingredients into numerical representations

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
ingredients_tfidf = vectorizer.fit_transform(data['ingredients_cleaned'])

print("TF-IDF Matrix Shape:", ingredients_tfidf.shape)


TF-IDF Matrix Shape: (106214, 2427)


##Cosine Similarity

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Fungsi untuk merekomendasikan makanan
def recommend_food(user_input, top_n=5):
    # Preprocessing input user
    user_input_cleaned = clean_ingredients(user_input)
    user_tfidf = vectorizer.transform([user_input_cleaned])

    # Hitung kemiripan
    similarity_scores = cosine_similarity(user_tfidf, ingredients_tfidf)

    # Ambil indeks makanan dengan skor tertinggi
    top_indices = similarity_scores[0].argsort()[-top_n:][::-1]

    # Tampilkan hasil rekomendasi
    recommendations = data.iloc[top_indices][['Name', 'ingredients_cleaned']]
    return recommendations


In [None]:
# Contoh input user
user_input = "penne pasta, mozzarella cheese, zucchini, parmesan cheese"
recommendations = recommend_food(user_input)
print("\nRekomendasi makanan:")
recommendations


Rekomendasi makanan:


Unnamed: 0,Name,ingredients_cleaned
7304,Vegetable Pasta Bake,penne pasta mozzarella cheese zucchini parmesa...
86746,OAMC Baked Ziti,penne pasta onion green pepper mozzarella chee...
60940,Pizza Pasta,penne pasta tomatoes mozzarella cheese fresh b...
32475,Pizza Pasta Salad,penne pasta tomatoes mozzarella cheese dried b...
13976,Creamy Baked Ziti With Broccoli,penne pasta olive oil garlic parmesan cheese h...


In [None]:
import joblib

# Simpan TF-IDF Vectorizer
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

# Simpan dataset
data.to_csv('processed_recipes.csv', index=False)


In [None]:
# Load TF-IDF Vectorizer dan dataset
vectorizer = joblib.load('tfidf_vectorizer.pkl')
data = pd.read_csv('processed_recipes.csv')

# Gunakan fungsi rekomendasi
user_input = "penne pasta, mozzarella cheese, zucchini, parmesan chees "
recommendations = recommend_food(user_input)
print("\nRekomendasi makanan:")
recommendations


Rekomendasi makanan:


Unnamed: 0,Name,ingredients_cleaned
7304,Vegetable Pasta Bake,penne pasta mozzarella cheese zucchini parmesa...
86746,OAMC Baked Ziti,penne pasta onion green pepper mozzarella chee...
60940,Pizza Pasta,penne pasta tomatoes mozzarella cheese fresh b...
32475,Pizza Pasta Salad,penne pasta tomatoes mozzarella cheese dried b...
13976,Creamy Baked Ziti With Broccoli,penne pasta olive oil garlic parmesan cheese h...
