In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from sklearn.metrics.pairwise import cosine_similarity



In [10]:
nytc_features = pd.read_pickle('../data/nytc_features.pkl')
nytc_features

Unnamed: 0,recipe_name,cuisine,ingredient,category,ingredient_parsed
0,Crispy Potato Tacos,mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...","dinner, tacos, appetizer, main course","sea salt, potatoes, cheddar, handful of cilant..."
1,Street Corn Pudding,southern,"[Nonstick cooking spray, 1 large or 2 medium j...","brunch, dinner, lunch, custards and puddings, ...","nonstick cooking spray, creamed corn, kernel c..."
2,Gorditas de Maíz,mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"project, side dish","fresh fine-grind corn masa masa harina, sea salt"
3,Tortillas de Maíz,mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"project, side dish","fresh fine-grind corn masa masa harina, sea salt"
4,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"dinner, lunch, dumplings, project, side dish","fresh fine-grind corn masa masa harina, sea sa..."
...,...,...,...,...,...
8548,Pork Chops Provencal,french,"[2 tablespoons olive oil, or as desired, 3 pou...","dinner, main course","olive oil, pork chops, mushrooms, dry white wi..."
8549,Spinach Linguine With Tomato Sauce,italian,"[1/2 pound fresh or dried green linguine, Salt...","dinner, pastas, main course","fresh green linguine, salt, tomatoes, olive oi..."
8550,Lobster and Olive Pasta Salad,italian,[3/4 pound mixed fresh yellow and green fettuc...,"dinner, pastas, main course","fresh yellow and green fettuccine, pimento-stu..."
8551,Breaded Sweetbreads,french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...","project, appetizer","sweetbreads, egg, water, oil, salt, freshly gr..."


In [5]:
# Tfidf needs unicode or string types
nytc_features['ingredient_parsed'] = nytc_features['ingredient_parsed']

# TF-IDF feature extractor
tfidf = TfidfVectorizer()
tfidf_encodings = tfidf.fit_transform(nytc_features['ingredient_parsed'])

# save the tfidf model and encodings
with open('./tfidf.pkl', "wb") as f:
     pickle.dump(tfidf, f)
with open('./tfidf_encodings.pkl', "wb") as f:
     pickle.dump(tfidf_encodings, f)

In [7]:
test_ingredients = 'pork, onion, gochugaru, sugar'

In [11]:

# use our pretrained tfidf model to encode our input ingredients
ingredients_tfidf = tfidf.transform([test_ingredients])

# calculate cosine similarity between actual recipe ingreds and test ingreds
cos_sim = map(lambda x: cosine_similarity(ingredients_tfidf, x), tfidf_encodings)
scores = list(cos_sim)

In [14]:
len(scores)

8553

In [18]:
def get_recommendations(N, scores):
    # load in recipe dataset
    df_recipes = nytc_features # pd.read_csv(config.PARSED_PATH)
    # order the scores with and filter to get the highest N scores
    top = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:N]
    # create dataframe to load in recommendations
    recommendation = pd.DataFrame(columns = ['recipe', 'ingredients', 'score', 'cuisine'])
    count = 0
    for i in top:
        recommendation.at[count, 'recipe'] = (df_recipes['recipe_name'][i])
        
        recommendation.at[count, 'ingredients'] = (df_recipes['ingredient_parsed'][i])
        
        recommendation.at[count, 'cuisine'] = df_recipes['cuisine'][i]
        recommendation.at[count, 'score'] = "{:.3f}".format(float(scores[i]))
        
        count += 1
    return recommendation


In [19]:
get_recommendations(10, scores)

Unnamed: 0,recipe,ingredients,score,cuisine
0,Cucumber Kimchi,"cucumbers, kosher salt, sugar, gochugaru (kore...",0.414,korean
1,Tongbaechu Kimchi (Whole Napa Cabbage Kimchi),"napa cabbage, kosher salt kosher salt, red app...",0.41,korean
2,Any Fish Jorim,"red radishes, soy sauce, sesame oil, garlic, f...",0.373,korean
3,Grilled Gochujang Pork With Fresh Sesame Kimchi,"gochugaru (korean red chile flakes), white vin...",0.307,korean
4,Kimchi Jjigae (Kimchi Soup),"fresh pork belly, garlic, ginger, soy sauce, s...",0.298,korean
5,Kimchi,"napa cabbage, kosher salt kosher salt (such as...",0.282,korean
6,Kimchi Jjigae With Ribs,"baby back ribs, ginger, unsalted butter, gochu...",0.274,korean
7,Whipped Berry Air Pudding,sugar,0.263,scandinavian
8,Raspberry Sauce (Sauce aux Framboise),sugar,0.263,french
9,‘Instant’ Kimchi With Greens and Bean Sprouts,"anchovy sauce, sesame oil, sugar, garlic, ging...",0.262,korean
