In [171]:
import pandas as pd
import ast
import re 


In [172]:
data = pd.read_csv("./../data/recipe-dataset.csv")
print(data.head(5))

   Unnamed: 0                                              Title  \
0           0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1           1                    Crispy Salt and Pepper Potatoes   
2           2                        Thanksgiving Mac and Cheese   
3           3                 Italian Sausage and Bread Stuffing   
4           4                                       Newton's Law   

                                         Ingredients  \
0  ['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...   
1  ['2 large egg whites', '1 pound new potatoes (...   
2  ['1 cup evaporated milk', '1 cup whole milk', ...   
3  ['1 (¾- to 1-pound) round Italian loaf, cut in...   
4  ['1 teaspoon dark brown sugar', '1 teaspoon ho...   

                                        Instructions  \
0  Pat chicken dry with paper towels, season all ...   
1  Preheat oven to 400°F and line a rimmed baking...   
2  Place a rack in middle of oven; preheat to 400...   
3  Preheat oven to 350°F with 

In [173]:
data.dropna(inplace=True)
data.drop_duplicates(inplace=True)
data.drop(columns=["Unnamed: 0"], inplace=True)

In [174]:
def clean_ingredient_list(ingredients):
    cleaned = []
    for ing in ingredients:
        ing = re.sub(r'[^a-zA-Z ]', '', ing)
        ing = ing.lower().strip()
        words_to_remove = ["cup", "tsp", "tbsp", "pound", "pounds", "ounce", "ounces", "oz", "lb", "teaspoon", "teaspoons", "tablespoon", "tablespoons", "whole", "qt", "to", "g", "cups"]
        ing = " ".join([word for word in ing.split() if word not in words_to_remove])
        if ing:
            cleaned.append(ing)
    return list(set(cleaned))

data["Cleaned_Ingredients"] = data["Cleaned_Ingredients"].apply(lambda x: clean_ingredient_list(ast.literal_eval(x)) if isinstance(x, str) else clean_ingredient_list(x))
data["Cleaned_Ingredients"].head(15)

0     [chicken, kosher salt, freshly ground black pe...
1     [kosher salt, new potatoes about inch in diame...
2     [elbow macaroni, garlic powder, onion powder, ...
3     [heavy cream divided, olive oil divided, large...
4     [apple butter storebought or homemade, hot wat...
5     [chamomile tea bags, fresh lemon juice, reposa...
6     [amaro averna, fresh lemon juice taste dependi...
7     [turmeric syrup, amontillado sherry, granulate...
8     [assorted dals such as chana dal moong dal mas...
9     [cumin, aleppo pepper or crushed red pepper fl...
10    [cream cheese roughly diced at room temperatur...
11    [smooth dijon mustard, unsweetened apple juice...
12    [medium granny smith apples about total, reduc...
13    [chicken or chicken legs thigh and drumstick a...
14    [garlic cloves, lowsodium chicken broth, unsal...
Name: Cleaned_Ingredients, dtype: object

In [175]:
print(data.head(5))

                                               Title  \
0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1                    Crispy Salt and Pepper Potatoes   
2                        Thanksgiving Mac and Cheese   
3                 Italian Sausage and Bread Stuffing   
4                                       Newton's Law   

                                         Ingredients  \
0  ['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...   
1  ['2 large egg whites', '1 pound new potatoes (...   
2  ['1 cup evaporated milk', '1 cup whole milk', ...   
3  ['1 (¾- to 1-pound) round Italian loaf, cut in...   
4  ['1 teaspoon dark brown sugar', '1 teaspoon ho...   

                                        Instructions  \
0  Pat chicken dry with paper towels, season all ...   
1  Preheat oven to 400°F and line a rimmed baking...   
2  Place a rack in middle of oven; preheat to 400...   
3  Preheat oven to 350°F with rack in middle. Gen...   
4  Stir together brown sugar and hot water in 

Trenowanie modelu na podstawie kolumny **Cleaned_Ingredients** oraz pasującej do składników nazwy danego dania - reszta danych jest zwracana już dla klienta na podstawie odgadnietego tytułu.

Cel modelu: na bazie dostępnych składników klienta (input) model przewiduje jakie dania może zrobić (output: title + id)

wykorzystanie modelu: model zwraca tytuł i ID dania -> potem access już jest po ID do bazy danych aby otrzymać dokładny przepis

In [176]:
# do modelu nie są potrzebne kolumny: Ingredients, Instructions i Image_Name
# Dataset powinien zawierać: id (do identyfikacji w db, tylko do outputu); Title (docelowy output); <wszystkie kolumny odpowiadające składnikom, z zaznaczeniem 1/0 czy dany przepis je zawiera>
# używamy multilabel classification 
# polecajka n najlepszych przepisow 
# mozna dodac do outputu ilość brakujacych skladnikow (i jakie, ale jeszcze idk jak)

In [177]:
columns_to_drop = ["Ingredients", "Instructions", "Image_Name"]
data.drop(columns=columns_to_drop, inplace=True)
data.rename(columns={"Cleaned_Ingredients": "Ingredients"}, inplace=True)
print(data.head(5))

                                               Title  \
0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1                    Crispy Salt and Pepper Potatoes   
2                        Thanksgiving Mac and Cheese   
3                 Italian Sausage and Bread Stuffing   
4                                       Newton's Law   

                                         Ingredients  
0  [chicken, kosher salt, freshly ground black pe...  
1  [kosher salt, new potatoes about inch in diame...  
2  [elbow macaroni, garlic powder, onion powder, ...  
3  [heavy cream divided, olive oil divided, large...  
4  [apple butter storebought or homemade, hot wat...  


In [178]:
print(data['Ingredients'].apply(len))

0        21
1         7
2        10
3        13
4         6
         ..
13496    13
13497    10
13498     7
13499     6
13500    16
Name: Ingredients, Length: 13493, dtype: int64


In [179]:
for row in data['Ingredients']:
    print(f"Row type: {type(row)}, Content: {row}")

Row type: <class 'list'>, Content: ['chicken', 'kosher salt', 'freshly ground black pepper', 'unsalted butter room temperature', 'finely chopped rosemary', 'small red onion thinly sliced', 'unsalted chicken broth', 'allpurpose flour', 'finely chopped sage', 'small acorn squash about total', 'pinch of crushed red pepper flakes', 'dry white wine', 'apple cider vinegar', 'unsalted butter melted plus room temperature', 'ground allspice', 'white miso', 'medium apples such as gala or pink lady about total cored cut into pieces', 'loaf goodquality sturdy white bread torn into pieces about', 'extravirgin olive oil', 'freshly ground pepper', 'kosher salt divided plus more']
Row type: <class 'list'>, Content: ['kosher salt', 'new potatoes about inch in diameter', 'finely chopped rosemary', 'finely ground black pepper', 'large egg whites', 'finely chopped thyme', 'finely chopped parsley']
Row type: <class 'list'>, Content: ['elbow macaroni', 'garlic powder', 'onion powder', 'evaporated milk', 'mi

In [None]:
data['Ingredients'].map(type).unique()
unique_ingredients = set(ingredient for row in data['Ingredients'] for ingredient in row)
ingredient_columns = {}

# Loop through each ingredient and build a column in the dictionary
for ingredient in unique_ingredients:
    ingredient_columns[ingredient] = data['Ingredients'].apply(lambda x: 1 if ingredient in x else 0)

df = pd.concat([data, pd.DataFrame(ingredient_columns)], axis=1)

print(data.head(5))

                                               Title  \
0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1                    Crispy Salt and Pepper Potatoes   
2                        Thanksgiving Mac and Cheese   
3                 Italian Sausage and Bread Stuffing   
4                                       Newton's Law   

                                         Ingredients  
0  [chicken, kosher salt, freshly ground black pe...  
1  [kosher salt, new potatoes about inch in diame...  
2  [elbow macaroni, garlic powder, onion powder, ...  
3  [heavy cream divided, olive oil divided, large...  
4  [apple butter storebought or homemade, hot wat...  


In [182]:
print(df.head())

                                               Title  \
0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1                    Crispy Salt and Pepper Potatoes   
2                        Thanksgiving Mac and Cheese   
3                 Italian Sausage and Bread Stuffing   
4                                       Newton's Law   

                                         Ingredients  \
0  [chicken, kosher salt, freshly ground black pe...   
1  [kosher salt, new potatoes about inch in diame...   
2  [elbow macaroni, garlic powder, onion powder, ...   
3  [heavy cream divided, olive oil divided, large...   
4  [apple butter storebought or homemade, hot wat...   

   then transfer an airtight container and chill  \
0                                              0   
1                                              0   
2                                              0   
3                                              0   
4                                              0   

   package fr