In [69]:
import json

import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

In [70]:
model_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(model_url)

In [98]:
df = pd.read_json("eda_receitas_data/receitas.json")

In [99]:
df

Unnamed: 0,directions,fat,date,categories,calories,desc,protein,rating,title,ingredients,sodium
0,"[1. Place the stock, lentils, celery, carrot, ...",7.0,2006-09-01 04:00:00+00:00,"[Sandwich, Bean, Fruit, Tomato, turkey, Vegeta...",426.0,,30.0,2.500,"Lentil, Apple, and Turkey Wrap","[4 cups low-sodium vegetable or chicken stock,...",559.0
1,[Combine first 9 ingredients in heavy medium s...,23.0,2004-08-20 04:00:00+00:00,"[Food Processor, Onion, Pork, Bake, Bastille D...",403.0,This uses the same ingredients found in boudin...,18.0,4.375,Boudin Blanc Terrine with Red Onion Confit,"[1 1/2 cups whipping cream, 2 medium onions, c...",1439.0
2,[In a large heavy saucepan cook diced fennel a...,7.0,2004-08-20 04:00:00+00:00,"[Soup/Stew, Dairy, Potato, Vegetable, Fennel, ...",165.0,,6.0,3.750,Potato and Fennel Soup Hodge,"[1 fennel bulb (sometimes called anise), stalk...",165.0
3,[Heat oil in heavy large skillet over medium-h...,,2009-03-27 04:00:00+00:00,"[Fish, Olive, Tomato, Sauté, Low Fat, Low Cal,...",,The Sicilian-style tomato sauce has tons of Me...,,5.000,Mahi-Mahi in Tomato Olive Sauce,"[2 tablespoons extra-virgin olive oil, 1 cup c...",
4,[Preheat oven to 350°F. Lightly grease 8x8x2-i...,32.0,2004-08-20 04:00:00+00:00,"[Cheese, Dairy, Pasta, Vegetable, Side, Bake, ...",547.0,,20.0,3.125,Spinach Noodle Casserole,"[1 12-ounce package frozen spinach soufflé, th...",452.0
...,...,...,...,...,...,...,...,...,...,...,...
20125,[Beat whites in a bowl with an electric mixer ...,2.0,2004-08-20 04:00:00+00:00,"[Mixer, Cheese, Egg, Fry, Cocktail Party, Parm...",28.0,,2.0,3.125,Parmesan Puffs,"[2 large egg whites, 3 oz Parmigiano-Reggiano,...",64.0
20126,[Bring broth to simmer in saucepan.Remove from...,28.0,2008-02-28 22:06:54+00:00,"[Side, Kid-Friendly, High Fiber, Dinner, Parme...",671.0,Cooking the artichokes with the rice infuses t...,22.0,4.375,Artichoke and Parmesan Risotto,"[5 1/2 cups (or more) low-salt chicken broth, ...",583.0
20127,"[Using a sharp knife, cut a shallow X in botto...",38.0,2005-10-21 18:21:20+00:00,"[Onion, Poultry, turkey, Vegetable, Bake, Kid-...",563.0,,31.0,4.375,Turkey Cream Puff Pie,"[1 small tomato, 1 small onion, finely chopped...",652.0
20128,[Heat 2 tablespoons oil in heavy medium skille...,24.0,2004-08-20 04:00:00+00:00,"[Milk/Cream, Citrus, Dairy, Fish, Garlic, Past...",631.0,"Sharon Hooykaas of Los Alamitos, California, w...",45.0,4.375,Snapper on Angel Hair with Citrus Cream,"[4 tablespoons olive oil, 4 shallots, thinly s...",517.0


In [100]:
df = df[df["title"].duplicated() == False].sort_values("title").drop(
    columns=["fat", "date", "calories", "protein", "sodium", "directions"]
).dropna().reset_index(drop=True)

In [101]:
df["ingredients"] = df["ingredients"].astype("string")
df["categories"] = df["categories"].astype("string")

In [102]:
df["categories"] = df["categories"].apply(literal_eval)
df["ingredients"] = df["ingredients"].apply(literal_eval)
df["documents"] = df["title"] + ' ' + df["desc"] + ' '+ df["ingredients"].str.join(' ')
# df["documents"] = df["documents"].replace('\d+', '', regex=True)

In [103]:
def embed(texts):
    return model(texts)

In [104]:
similarities = cosine_similarity(embeddings, embeddings)

In [105]:
embeddings = embed(df["documents"])

In [106]:
def recommend(text):
    emb = embed([text])
    neighbors = nn.kneighbors(emb, return_distance=False)
    return df['title'].iloc[neighbors[0]].tolist()

In [107]:
def get_recommended_recipes(ingredients, n_recommendations=3):
    # Cria uma string com os ingredientes fornecidos
    user_ingredients = ' '.join(ingredients)
    
    user_embeddings = embed([user_ingredients])
    
    # Calcula as similaridades entre os ingredientes fornecidos e as receitas
    similarities_with_user = cosine_similarity(user_embeddings, embeddings).flatten()
    
    # Obtém os índices das receitas mais similares
    similar_recipe_indices = similarities_with_user.argsort()[::-1][:n_recommendations]
    similar_recipe_scores = similarities_with_user[similar_recipe_indices]
    
    # Retorna os títulos das receitas recomendadas e os scores de similaridade
    recommended_recipes = receitas.loc[similar_recipe_indices, 'title'].tolist()
    recommendation_scores = list(similar_recipe_scores)
    
    return recommended_recipes, recommendation_scores, similar_recipe_indices

In [108]:
user_ingredients = ["Bacon, eggs, fast, cheese, ground beef"]  # Ingredientes fornecidos pelo usuário
recommendations, scores, recipe_indices = get_recommended_recipes(user_ingredients, 5)

# Imprime as receitas recomendadas e os scores
print('Recomendações:')
for recommendation, score, indice in zip(recommendations, scores, recipe_indices):
    print(f"{indice} - {recommendation} (Score: {score})")

Recomendações:
501 - Tamarind-Glazed Turkey Burgers  (Score: 0.4738388657569885)
3492 - Penne with Ragoût of Chili-Rubbed Short Ribs  (Score: 0.4697107672691345)
6162 - Cucumber Gin Cocktail  (Score: 0.4549691081047058)
519 - Red Wine-Braised Short Rib Stew with Potatoes, Carrots, and Mushrooms  (Score: 0.4311957359313965)
536 - Flourless Pistachio Cake with Strawberry Meringue  (Score: 0.428232342004776)
