In [5]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
#from input_cleaner import ingredient_parser, get_and_sort_corpus
# Load the SentenceTransformer model
bert = SentenceTransformer('bert-base-nli-mean-tokens')
 
# Load the DataFrame from CSV
df = pd.read_csv('/Users/ved/Desktop/Capstone/Dataset/string_ingredients.csv')
 
# Load precomputed sentence embeddings
sentence_embeddings = np.load('sentence_embeddings.npy')
 
def custom(input_str):
    try:
        # Encode the input string using BERT
        input_str = input_str.split(",")
    # parse ingredient list
        input_str = ingredient_parser(input_str)
        input_str = get_and_sort_corpus(input_str)
        input_str = ", ".join(input_str)
        input_embedding = bert.encode(input_str)
 
        # Compute cosine similarities between the input and all recipes
        similarities = cosine_similarity(sentence_embeddings, input_embedding.reshape(1, -1))
 
        # Get the indices of the top 10 most similar recipes
        top_indices = similarities.argsort(axis=0)[-10:][::-1].flatten()
 
        # Create a DataFrame with the recommended recipes
        recommendation_data = df.iloc[top_indices]
        recommendation_data = recommendation_data.reset_index(drop=True)
        return recommendation_data
    except Exception as e:
        return f"An error occurred: {str(e)}"
 
# Example input
input_str = "chicken, eggs , tomato, onion"
result = custom(input_str)
print(result)

                                TranslatedRecipeName  \
0               Chicken In Tomato Onion Gravy Recipe   
1                      Cheese Masala Omelette Recipe   
2  Thai Style Kai Jeow Moo Sab Recipe - Omelette ...   
3  Chicken Dimsums Recipe - Steamed Chicken Dumpl...   
4  Akuri Recipe (Parsi Style Seasoned Scrambled E...   
5                        Crispy Crab Rangoons Recipe   
6                  Japanese Chicken Udon Soup Recipe   
7  Mexican Breakfast Tortilla, Fried Eggs & Black...   
8  Mexican Chicken Burger Recipe With Sour Cream ...   
9                        Chicken And Egg Soup Recipe   

                               TranslatedIngredients  TotalTimeInMins  \
0  2 Green Chillies,2 teaspoons Ginger Garlic Pas...               30   
1  Salt - as required,2 tablespoons Cheese - grat...               25   
2  Sunflower Oil - for frying,4 Whole Eggs,Salt -...               20   
3  1 cup All Purpose Flour (Maida),1/2 tablespoon...               40   
4  3 tablespoon Fr

In [3]:
import pandas as pd
import ast
import re
import numpy as np

import string 
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
import nltk
#nltk.download('wordnet')

import unidecode
import nltk.corpus
from gensim.models import Word2Vec

from gensim.models import Word2Vec
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
import config

def ingredient_parser(ingredients):
    # measures and common words (already lemmatized)   
    measures = ['teaspoon', 't', 'tsp.', 'tablespoon', 'T', ...]
    words_to_remove = ['fresh', 'a', 'red', 'bunch', ...]
    # Turn ingredient list from string into a list 
    if isinstance(ingredients, list):
       pass
    else:
       ingredients = ast.literal_eval(ingredients)
    # We first get rid of all the punctuation
    remove_punctuations = str.maketrans('', '', string.punctuation)
    # initialize nltk's lemmatizer    
    lemmatizer = WordNetLemmatizer()
    ingred_list = []
    for each_item in ingredients:
        each_item.translate(remove_punctuations)
        # We split up with hyphens as well as spaces
        items = re.split(' |-', each_item)
        # Get rid of words containing non alphabet letters
        items = [word for word in items if word.isalpha()]
        # Turn everything to lowercase
        items = [word.lower() for word in items]
        # remove accents
        items = [unidecode.unidecode(word) for word in items]
        # Lemmatize words so we can compare words to measuring words
        items = [lemmatizer.lemmatize(word) for word in items]
        # get rid of stop words
        stop_words = set(nltk.corpus.stopwords.words('english'))
        items = [word for word in items if word not in stop_words]
        # Gets rid of measuring words/phrases, e.g. heaped teaspoon
        items = [word for word in items if word not in measures]
        # Get rid of common easy words
        items = [word for word in items if word not in words_to_remove]
        if items:
           ingred_list.append(' '.join(items))
    return ingred_list

# get corpus with the documents sorted in alphabetical order
def get_and_sort_corpus(data):
    data.sort()
    return data

In [17]:
len(actual_recipes)

122

In [18]:
result['TranslatedRecipeName']

0                 Chicken In Tomato Onion Gravy Recipe
1    Chicken Dimsums Recipe - Steamed Chicken Dumpl...
2    Thai Style Kai Jeow Moo Sab Recipe - Omelette ...
3                        Cheese Masala Omelette Recipe
4    Akuri Recipe (Parsi Style Seasoned Scrambled E...
5                    Japanese Chicken Udon Soup Recipe
6    Mexican Breakfast Tortilla, Fried Eggs & Black...
7    Mexican Chicken Burger Recipe With Sour Cream ...
8                          Crispy Crab Rangoons Recipe
9                          Chicken And Egg Soup Recipe
Name: TranslatedRecipeName, dtype: object

In [19]:
correct_recommendations

[True, True]

In [26]:
K = 10 # mention it as actual no. of recipies
# Calculate P@K
correct_recommendations=[]
for recipe in actual_recipes:
    for i in result['TranslatedRecipeName'][:K]:
        if recipe == i:
            correct_recommendations.append(True)
#correct_recommendations = [recipe in rec['TranslatedRecipeName'][:K] for recipe in actual_recipes]
precision_at_K = sum(correct_recommendations) / K
print(f"Precision at {K} using Word2Vec using TF-IDF vectorization: {precision_at_K}")


Precision at 10 using Word2Vec using TF-IDF vectorization: 0.2
