In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("recipes.csv",sep=",",encoding="utf-8")

In [3]:
df.head()

Unnamed: 0,title,ingredients
0,Banana Pancakes,"banana, flour, milk, egg"
1,Chocolate Cake,"flour, sugar, cocoa powder, egg, milk, butter"
2,Vanilla Muffins,"flour, sugar, vanilla, egg, milk, butter"
3,Omelette,"egg, milk, salt, pepper"
4,French Toast,"bread, egg, milk, cinnamon, sugar"


In [4]:
df["ingredients"] = df["ingredients"].str.lower()
df["ingredients"] = df["ingredients"].str.replace('[^\w\s]', '', regex=True)

In [5]:
df.tail()

Unnamed: 0,title,ingredients
45,Mushroom Risotto,rice mushroom onion parmesan
46,Zucchini Fritters,zucchini egg flour cheese
47,Cheesecake,cream cheese sugar egg biscuit
48,Baked Potato,potato cheese butter sour cream
49,Shrimp Pasta,pasta shrimp garlic cream


In [6]:
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['ingredients'])
print("TF-IDF Matrix Shape:", tfidf_matrix.shape)

TF-IDF Matrix Shape: (50, 69)


In [7]:
print(tfidf_matrix)

  (0, 24)	0.41455924614199613
  (0, 34)	0.4293007930024125
  (0, 26)	0.531992873753793
  (0, 2)	0.6006871422303466
  (1, 12)	0.29909741763690273
  (1, 49)	0.43338551659573527
  (1, 19)	0.518111873779411
  (1, 60)	0.3486591594120596
  (1, 24)	0.29909741763690273
  (1, 34)	0.30973319198992166
  (1, 26)	0.38382377482057844
  (2, 65)	0.5749078035667051
  (2, 12)	0.3318847687696796
  (2, 60)	0.38687951709893115
  (2, 24)	0.3318847687696796
  (2, 34)	0.34368644709818774
  (2, 26)	0.4258989120035666
  (3, 46)	0.5657660208236224
  (3, 53)	0.5657660208236224
  (3, 24)	0.4166833412605853
  (3, 34)	0.4315004200214913
  (4, 18)	0.5520304569934306
  (4, 8)	0.4441091539400065
  (4, 60)	0.4441091539400065
  (4, 24)	0.3809792386247897
  :	:
  (44, 62)	0.402381813835243
  (44, 4)	0.49101670167914163
  (44, 46)	0.5463476340970082
  (45, 35)	0.6036785793196606
  (45, 51)	0.4731791575059791
  (45, 42)	0.5459316652310388
  (45, 40)	0.3370938662607094
  (46, 68)	0.6641875145565367
  (46, 14)	0.4119936812765

In [8]:
tfidf.get_feature_names_out()

array(['apple', 'avocado', 'banana', 'bean', 'beef', 'bell', 'berry',
       'biscuit', 'bread', 'breadcrumb', 'broccoli', 'bulgur', 'butter',
       'carrot', 'cheese', 'chicken', 'chickpeas', 'chili', 'cinnamon',
       'cocoa', 'cream', 'croutons', 'cucumber', 'curry', 'egg', 'feta',
       'flour', 'garlic', 'granola', 'grapes', 'honey', 'lemon',
       'lettuce', 'mayo', 'milk', 'mushroom', 'mustard', 'oats', 'oil',
       'olive', 'onion', 'orange', 'parmesan', 'parsley', 'pasta',
       'peanut', 'pepper', 'pita', 'potato', 'powder', 'pumpkin', 'rice',
       'salmon', 'salt', 'sauce', 'shrimp', 'sour', 'soy', 'spaghetti',
       'strawberry', 'sugar', 'tahini', 'tomato', 'tortilla', 'tuna',
       'vanilla', 'water', 'yogurt', 'zucchini'], dtype=object)

In [9]:
cosine_sim = cosine_similarity(tfidf_matrix)
print("Cosine similarity matrix shape:", cosine_sim.shape)

Cosine similarity matrix shape: (50, 50)


In [10]:
for i in range(5):
    print(f"Tarif 0 ile Tarif {i} arasındaki benzerlik: {cosine_sim[0][i]:.3f}")

Tarif 0 ile Tarif 0 arasındaki benzerlik: 1.000
Tarif 0 ile Tarif 1 arasındaki benzerlik: 0.461
Tarif 0 ile Tarif 2 arasındaki benzerlik: 0.512
Tarif 0 ile Tarif 3 arasındaki benzerlik: 0.358
Tarif 0 ile Tarif 4 arasındaki benzerlik: 0.327


In [11]:
selected_recipe_index = 0

similarity_scores = list(enumerate(cosine_sim[selected_recipe_index]))
similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
similarity_scores = similarity_scores[1:]

In [12]:
top_similar_recipes = similarity_scores[:5]

print(f"Seçilen tarif: {df.iloc[selected_recipe_index]['title']}\n")
print("Bu tarife benzer diğer tarifler ve içerdiği malzemeler ile benzerlik skorları:\n")

for idx, score in top_similar_recipes:
    recipe_title = df.iloc[idx]['title']
    recipe_ingredients = df.iloc[idx]['ingredients']
    
    print(f"- {recipe_title}")
    print(f"  Malzemeler: {recipe_ingredients}")
    print(f"  Benzerlik skoru: {score:.2f}\n")

Seçilen tarif: Banana Pancakes

Bu tarife benzer diğer tarifler ve içerdiği malzemeler ile benzerlik skorları:

- Vanilla Muffins
  Malzemeler: flour sugar vanilla egg milk butter
  Benzerlik skoru: 0.51

- Chocolate Cake
  Malzemeler: flour sugar cocoa powder egg milk butter
  Benzerlik skoru: 0.46

- Fruit Smoothie
  Malzemeler: banana strawberry milk yogurt
  Benzerlik skoru: 0.45

- Zucchini Fritters
  Malzemeler: zucchini egg flour cheese
  Benzerlik skoru: 0.42

- Omelette
  Malzemeler: egg milk salt pepper
  Benzerlik skoru: 0.36



In [13]:
def recommend_recipes(recipe_title, user_ingredients, df, vectorizer, tfidf_matrix, top_n=5):
    filtered_recipes = df[df['title'].str.contains(recipe_title, case=False)].copy()

    if filtered_recipes.empty:
        return f"'{recipe_title}' içeren tarif bulunamadı."
    
    filtered_indices = filtered_recipes.index
    user_ing_text = " ".join(user_ingredients)
    user_vec = vectorizer.transform([user_ing_text])
    
    sim_scores = cosine_similarity(user_vec, tfidf_matrix[filtered_indices]).flatten()

    top_indices = sim_scores.argsort()[-top_n:][::-1]

    recommendations = []
    for i in top_indices:
        idx = filtered_indices[i]
        recommendations.append({
            "title": df.iloc[idx]['title'],
            "ingredients": df.iloc[idx]['ingredients'],
            "similarity_score": sim_scores[i]
        })
    
    return recommendations


In [14]:
user_title = "Pancake"
user_ingredients = ["banana", "egg", "flour"]

recommendations = recommend_recipes(user_title, user_ingredients, df, tfidf, tfidf_matrix)

print("Önerilen Tarifler:")
for rec in recommendations:
    print(f"Tarif: {rec['title']}")
    print(f"Malzemeler: {rec['ingredients']}")
    print(f"Benzerlik skoru: {rec['similarity_score']:.2f}\n")

Önerilen Tarifler:
Tarif: Banana Pancakes
Malzemeler: banana flour milk egg
Benzerlik skoru: 0.90



In [None]:
import pickle

# TF-IDF Vectorizer
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf, f)

# TF-IDF Matrix
with open('tfidf_matrix.pkl', 'wb') as f:
    pickle.dump(tfidf_matrix, f)