In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Create dataframe from parsed recipes file and cast ingredients to unicode for TFIDF to run
df = pd.read_csv('./recipes_parsed.csv', index_col='RecipeID')
df['RecipeIngredientParts'] = df['RecipeIngredientParts'].values.astype(str)


In [10]:
def buildModel (userInput):
    # Initialize CountVectorizer and fit model to user input
    cv = CountVectorizer(stop_words = 'english')
    cv_model = cv.fit_transform(userInput + df['RecipeIngredientParts'])
    
    # Determine cosine similarity score
    cos_sim = cosine_similarity(cv_model)
    
    input_sim = cos_sim[0][1:]
    
    recipe_ranking = sorted(enumerate(input_sim), key=lambda x: x[1], reverse=True)
    
    for rank, (doc_index, similarity) in enumerate(recipe_ranking):
        print(f"Rank {rank+1}: Document {doc_index+1}, Name {df['Name'].iloc[doc_index]}, Similarity: {similarity:.2f}")

In [11]:
buildModel("margarine flour egg vanilla buttermilk")

Rank 1: Document 387, Name Our Lasagna, Similarity: 0.93
Rank 2: Document 2886, Name Dessert Crepes with Strawberry Cream Filling, Similarity: 0.93
Rank 3: Document 3783, Name Fabulous Fat-Free Fruit Sorbet, Similarity: 0.93
Rank 4: Document 6829, Name Creamed Chipped Beef, Similarity: 0.93
Rank 5: Document 749, Name Indian Samosa, Similarity: 0.92
Rank 6: Document 2257, Name Horseradish Sauce, Similarity: 0.92
Rank 7: Document 2951, Name My Favorite King Ranch Chicken, Similarity: 0.92
Rank 8: Document 3306, Name Pork Tenderloin with Merlot-Shallot Sauce, Similarity: 0.92
Rank 9: Document 3322, Name Eric's Easy Grilled Chicken, Similarity: 0.92
Rank 10: Document 4754, Name The Last Peanut Butter Cookies Recipe You'll Ever Try, Similarity: 0.92
Rank 11: Document 5112, Name Peanut Butter Fudge, Similarity: 0.92
Rank 12: Document 5780, Name Stir-Fry Chicken With Garlic Sauce, Similarity: 0.92
Rank 13: Document 6339, Name Curried Sausages, Similarity: 0.92
Rank 14: Document 6900, Name Whi