In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("RAW_recipes.csv",sep=",",encoding="utf-8")

In [4]:
df.head()

Unnamed: 0,id,title,description,instructions,ingredients
0,82,Grilled Chicken Breast,Simply grilled chicken with lemon and herbs.,1. Prepare ingredients. 2. Cook accordingly. 3...,"olive oil, chicken breast, lemon juice, thyme"
1,60,Kuru Fasulye,Classic Turkish white bean stew with tomato sa...,1. Soak beans overnight. 2. Sauté onions and t...,"white beans, onion, tomato paste, olive oil"
2,61,Tavuk Sote,Chicken sautéed with mixed vegetables.,1. Prepare ingredients. 2. Cook accordingly. 3...,"onion, garlic, olive oil, chicken breast"
3,62,Tavuk Şiş,Marinated chicken skewers grilled to perfection.,1. Prepare ingredients. 2. Cook accordingly. 3...,"olive oil, red pepper flakes, salt, chicken br..."
4,63,Adana Kebap,Spicy minced meat kebab.,1. Prepare ingredients. 2. Cook accordingly. 3...,"ground beef, onion, red pepper flakes, salt"


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            29 non-null     int64 
 1   title         29 non-null     object
 2   description   29 non-null     object
 3   instructions  29 non-null     object
 4   ingredients   29 non-null     object
dtypes: int64(1), object(4)
memory usage: 1.3+ KB


In [6]:
df.drop(['contributor_id','submitted','n_steps','nutrition'],axis=1,inplace=True)

KeyError: "['contributor_id', 'submitted', 'n_steps', 'nutrition'] not found in axis"

In [6]:
df.head()

Unnamed: 0,name,id,minutes,tags,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,"['60-minutes-or-less', 'time-to-make', 'course...","['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,"['30-minutes-or-less', 'time-to-make', 'course...","['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,"['time-to-make', 'course', 'preparation', 'mai...","['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,"['60-minutes-or-less', 'time-to-make', 'course...",['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,"['weeknight', 'time-to-make', 'course', 'main-...",['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [7]:
df.isnull().sum()

name                1
id                  0
minutes             0
tags                0
steps               0
description      4979
ingredients         0
n_ingredients       0
dtype: int64

In [8]:
df.dropna(subset=['name','description'],inplace=True)

In [9]:
df.isnull().sum()

name             0
id               0
minutes          0
tags             0
steps            0
description      0
ingredients      0
n_ingredients    0
dtype: int64

In [10]:
df['ingredients_cleaned'] = df['ingredients'].apply(lambda x:', '.join(eval(x)))

In [11]:
df['ingredients'].head()

0    ['winter squash', 'mexican seasoning', 'mixed ...
1    ['prepared pizza crust', 'sausage patty', 'egg...
2    ['ground beef', 'yellow onions', 'diced tomato...
3    ['spreadable cheese with garlic and herbs', 'n...
4    ['tomato juice', 'apple cider vinegar', 'sugar...
Name: ingredients, dtype: object

In [12]:
df['ingredients_cleaned'].head()

0    winter squash, mexican seasoning, mixed spice,...
1    prepared pizza crust, sausage patty, eggs, mil...
2    ground beef, yellow onions, diced tomatoes, to...
3    spreadable cheese with garlic and herbs, new p...
4    tomato juice, apple cider vinegar, sugar, salt...
Name: ingredients_cleaned, dtype: object

In [13]:
df.head()

Unnamed: 0,name,id,minutes,tags,steps,description,ingredients,n_ingredients,ingredients_cleaned
0,arriba baked winter squash mexican style,137739,55,"['60-minutes-or-less', 'time-to-make', 'course...","['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,"winter squash, mexican seasoning, mixed spice,..."
1,a bit different breakfast pizza,31490,30,"['30-minutes-or-less', 'time-to-make', 'course...","['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,"prepared pizza crust, sausage patty, eggs, mil..."
2,all in the kitchen chili,112140,130,"['time-to-make', 'course', 'preparation', 'mai...","['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,"ground beef, yellow onions, diced tomatoes, to..."
3,alouette potatoes,59389,45,"['60-minutes-or-less', 'time-to-make', 'course...",['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,"spreadable cheese with garlic and herbs, new p..."
4,amish tomato ketchup for canning,44061,190,"['weeknight', 'time-to-make', 'course', 'main-...",['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,"tomato juice, apple cider vinegar, sugar, salt..."


In [14]:
#1. Başlık TF-IDF
title_vectorizer = TfidfVectorizer()
title_matrix = title_vectorizer.fit_transform(df['name'])

#2. Malzeme TF-IDF
ingredient_vectorizer = TfidfVectorizer()
ingredient_matrix = ingredient_vectorizer.fit_transform(df['ingredients_cleaned'])

In [15]:
def recommendation(query_title, user_ingredients, df, 
                          title_vectorizer, title_matrix,
                          ingredient_vectorizer, ingredient_matrix,
                          top_n=5,
                          alpha=0.5):
    # Başlık benzerliği
    title_vec = title_vectorizer.transform([query_title])
    title_scores = cosine_similarity(title_vec, title_matrix).flatten()

    # Malzeme benzerliği
    all_features = set(ingredient_vectorizer.get_feature_names_out())
    common_ing = set(user_ingredients).intersection(all_features)
    if not common_ing:
        return "Hiçbir malzeme eşleşmedi."

    user_ing_text = " ".join(common_ing)
    ing_vec = ingredient_vectorizer.transform([user_ing_text])
    ing_scores = cosine_similarity(ing_vec, ingredient_matrix).flatten()

    # Skorları birleştir
    combined_scores = alpha * title_scores + (1 - alpha) * ing_scores

    # En iyi skorları sırala
    top_indices = combined_scores.argsort()[-top_n:][::-1]

    results = []
    for idx in top_indices:
        results.append({
            "title": df.iloc[idx]['name'],
            "ingredients": df.iloc[idx]['ingredients_cleaned'],
            "similarity_score": round(combined_scores[idx], 3)
        })

    return results

In [18]:
recommendation(
    query_title="chocolate cake",
    user_ingredients=["milk", "egg", "banana","sugar","butter","chocolate","water"],
    df=df,
    title_vectorizer=title_vectorizer,
    title_matrix=title_matrix,
    ingredient_vectorizer=ingredient_vectorizer,
    ingredient_matrix=ingredient_matrix,
    alpha=0.6
)

[{'title': 'chocolate chocolate cake',
  'ingredients': 'chocolate cake mix, water, vegetable oil, eggs, chocolate pudding mix, milk, cool whip, walnuts',
  'similarity_score': 0.682},
 {'title': 'chocolate cake',
  'ingredients': 'sugar, flour, salt, baking soda, cocoa, miracle whip, water, egg, vanilla',
  'similarity_score': 0.675},
 {'title': 'chocolate  cake',
  'ingredients': 'flour, sugar, baking soda, salt, cocoa, sour cream, brewed coffee, egg, canola oil',
  'similarity_score': 0.646},
 {'title': 'chocolate chocolate chocolate bundt cake',
  'ingredients': 'chocolate cake mix, instant chocolate pudding mix, cocoa, milk, eggs, milk chocolate chips, powdered sugar',
  'similarity_score': 0.613},
 {'title': 'chocolate chocolate cake  bundt cake',
  'ingredients': 'semi-sweet chocolate chips, butter, granulated sugar, eggs, vanilla, all-purpose flour, baking soda, salt, milk',
  'similarity_score': 0.61}]

In [17]:
#df.to_csv("recipes_cleaned.csv",index=False)