In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from sklearn.metrics.pairwise import cosine_similarity



In [4]:
nytc_features = pd.read_pickle('../data/nytc_training.pkl')
nytc_features

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction,raw_schema,calories,carbohydrates,cholesterol,fiber,protein,total_fat,unsaturated_fat,saturated_fat,trans_fat,sugar,sodium,ingredient_parsed
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re...",656,86,,12,22,27,14,10,0,8,1201,"sea salt, potatoes, cheddar, handful of cilant..."
1,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...,"{'@context': 'http://schema.org', '@type': 'Re...",316,30,,3,5,20,9,10,1,7,495,"nonstick cooking spray, creamed corn, kernel c..."
2,https://cooking.nytimes.com/recipes/1024129-go...,Gorditas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"[If using fresh masa, knead the masa, 1/2 teas...","{'@context': 'http://schema.org', '@type': 'Re...",51,11,,1,2,1,1,0,0,4,137,"fresh fine-grind corn masa masa harina, sea salt"
3,https://cooking.nytimes.com/recipes/1024128-to...,Tortillas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"[Set out a 1-gallon zip-top freezer bag, a pla...","{'@context': 'http://schema.org', '@type': 'Re...",24,5,,1,1,0,0,0,0,2,66,"fresh fine-grind corn masa masa harina, sea salt"
4,https://cooking.nytimes.com/recipes/1024130-te...,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, lunch, dumplings, project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,[Set out a blender; a 1-gallon zip-top freezer...,"{'@context': 'http://schema.org', '@type': 'Re...",112,14,,4,4,5,3,2,0,2,198,"fresh fine-grind corn masa masa harina, sea sa..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8548,https://cooking.nytimes.com/recipes/2362-pork-...,Pork Chops Provencal,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, main course",french,"[2 tablespoons olive oil, or as desired, 3 pou...",[Heat oil in a large skillet (12 or 15 inches)...,"{'@context': 'http://schema.org', '@type': 'Re...",541,17,,4,54,27,16,8,0,8,1408,"olive oil, pork chops, mushrooms, dry white wi..."
8549,https://cooking.nytimes.com/recipes/2322-spina...,Spinach Linguine With Tomato Sauce,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,"[1/2 pound fresh or dried green linguine, Salt...","[Bring to the boil 2 quarts water, or enough t...","{'@context': 'http://schema.org', '@type': 'Re...",318,47,,3,9,11,7,3,0,5,424,"fresh green linguine, salt, tomatoes, olive oi..."
8550,https://cooking.nytimes.com/recipes/2283-lobst...,Lobster and Olive Pasta Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,[3/4 pound mixed fresh yellow and green fettuc...,[Cook fettuccine in boiling water about 30 sec...,"{'@context': 'http://schema.org', '@type': 'Re...",519,48,,5,22,27,22,4,0,3,565,"fresh yellow and green fettuccine, pimento-stu..."
8551,https://cooking.nytimes.com/recipes/2282-bread...,Breaded Sweetbreads,"{'@context': 'http://schema.org', '@type': 'Nu...","project, appetizer",french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...",[Put the sweetbreads in a mixing bowl and add ...,"{'@context': 'http://schema.org', '@type': 'Re...",544,20,,1,18,43,23,16,1,1,433,"sweetbreads, egg, water, oil, salt, freshly gr..."


TFIDF Feature: ingredient only

In [5]:
display(nytc_features['ingredient_parsed'])

0       sea salt, potatoes, cheddar, handful of cilant...
1       nonstick cooking spray, creamed corn, kernel c...
2        fresh fine-grind corn masa masa harina, sea salt
3        fresh fine-grind corn masa masa harina, sea salt
4       fresh fine-grind corn masa masa harina, sea sa...
                              ...                        
8548    olive oil, pork chops, mushrooms, dry white wi...
8549    fresh green linguine, salt, tomatoes, olive oi...
8550    fresh yellow and green fettuccine, pimento-stu...
8551    sweetbreads, egg, water, oil, salt, freshly gr...
8552    butter, shallots, onions, garlic, thyme, tomat...
Name: ingredient_parsed, Length: 8553, dtype: object

In [6]:
# Tfidf needs unicode or string types
# nytc_features['ingredient_parsed'] = nytc_features['ingredient_parsed']

# TF-IDF feature extractor
tfidf_extractor = TfidfVectorizer()
ingre_encodings = tfidf_extractor.fit_transform(nytc_features['ingredient_parsed'])

# save the tfidf model and encodings
# with open('../model_checkpoints/tfidf_ingre.pkl', "wb") as f:
#      pickle.dump(tfidf_extractor, f)
# with open('../encodings/tfidf_ingre.pkl', "wb") as f:
#      pickle.dump(ingre_encodings, f)

In [7]:
import joblib
joblib.dump(tfidf_extractor, '../model_checkpoints/tfidf_ingredients_model.pkl')
joblib.dump(ingre_encodings, '../encodings/tfidf_ingredients_encodings.pkl')

['../encodings/tfidf_ingredients_encodings.pkl']

In [8]:
ingre_encodings

<8553x3541 sparse matrix of type '<class 'numpy.float64'>'
	with 169607 stored elements in Compressed Sparse Row format>

Let's see which recipes are the most similar to another

In [12]:
from sklearn.metrics.pairwise import linear_kernel

recipe_cs = linear_kernel(ingre_encodings, ingre_encodings)

In [13]:
recipe_cs.shape

(8553, 8553)

In [14]:
idx = (-recipe_cs[20]).argsort()[:10]
idx

array([  20, 5251, 4293, 1139, 3842, 3557, 1592, 8486, 3550, 1452])

In [15]:
nytc_features.iloc[idx]

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction,raw_schema,calories,carbohydrates,cholesterol,fiber,protein,total_fat,unsaturated_fat,saturated_fat,trans_fat,sugar,sodium,ingredient_parsed
20,https://cooking.nytimes.com/recipes/1021974-sa...,Saffron Fish With Red Peppers and Preserved Lemon,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, seafood, appetizer, main course","jewish, moroccan",[A few pinches to 1/2 teaspoon saffron strands...,[Pour 2 cups of boiling water into a bowl and ...,"{'@context': 'http://schema.org', '@type': 'Re...",118,10,,3,6,6,5,1,0.0,5,415,"saffron strands, olive oil, red bell peppers, ..."
5251,https://cooking.nytimes.com/recipes/6554-alger...,Algerian Spiced Striped Bass Tagine,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, main course",mediterranean,"[7 tablespoons extra virgin olive oil, 1/2 cup...",[Place 4 tablespoons olive oil in blender with...,"{'@context': 'http://schema.org', '@type': 'Re...",346,12,,4,29,21,16,3,0.0,5,761,"extra virgin olive oil, cilantro, garlic, ging..."
4293,https://cooking.nytimes.com/recipes/2266-moroc...,Moroccan Tomato Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","easy, quick, salads and dressings",moroccan,"[3 or 4 ripe red tomatoes, about 3/4 pound, or...",[Cut and discard the tomato cores. Cut the tom...,"{'@context': 'http://schema.org', '@type': 'Re...",159,6,,3,2,15,12,2,0.0,3,401,"red tomatoes, celery, parsley, capers, cherry ..."
1139,https://cooking.nytimes.com/recipes/1021079-gr...,Greek Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, lunch, quick, weekday, salads an...",greek,"[2 tablespoons red wine vinegar, 1 small garli...","[In a bowl or small glass measuring cup, whisk...","{'@context': 'http://schema.org', '@type': 'Re...",229,9,,3,7,19,11,7,0.0,4,678,"red wine vinegar, garlic, oregano, kosher salt..."
3842,https://cooking.nytimes.com/recipes/10716-shri...,Shrimp And Pepper,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, quick, main course",brazilian,"[3 dozen medium-large shrimp in their shells, ...",[Add half the oil to a skillet and place over ...,"{'@context': 'http://schema.org', '@type': 'Re...",307,8,,2,7,28,23,4,,5,398,"olive oil, garlic, red bell peppers, red peppe..."
3557,https://cooking.nytimes.com/recipes/1013107-sp...,"Spicy Orange Salad, Moroccan Style","{'@context': 'http://schema.org', '@type': 'Nu...","easy, quick, weekday, salads and dressings",moroccan,"[3 large seedless oranges, 1/8 teaspoon cayenn...","[Peel the oranges, paring away all the exterio...","{'@context': 'http://schema.org', '@type': 'Re...",155,13,,3,1,11,9,2,0.0,9,298,"oranges, cayenne, paprika, garlic, olive oil, ..."
1592,https://cooking.nytimes.com/recipes/1019788-sl...,Slow Cooker Pork Puttanesca Ragù,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, weekday, sauces and gravies, main course",italian,"[3 to 3 1/2 pounds boneless, skinless pork sho...","[Using a sharp knife, trim and discard the lar...","{'@context': 'http://schema.org', '@type': 'Re...",530,16,,4,36,36,21,12,0.0,8,780,"pork shoulder, kosher salt, olive oil, garlic,..."
8486,https://cooking.nytimes.com/recipes/10430-hall...,Halloumi Tzaganaki,"{'@context': 'http://schema.org', '@type': 'Nu...",appetizer,greek,"[1 large (8-ounce) Vidalia onion, 2 tablespoon...",[Preheat a grill or broiler to high. Cut the o...,"{'@context': 'http://schema.org', '@type': 'Re...",221,13,,2,10,15,7,7,0.0,5,800,"extra-virgin olive oil, anchovy fillets in oil..."
3550,https://cooking.nytimes.com/recipes/1015497-ar...,Arroz de Verduras (Vegetable Rice),"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, lunch, weekday, main course, side dish",portuguese,"[2 large tomatoes, 1/2 cup finely chopped onio...",[For the sofrito: Bring a pan of water to a bo...,"{'@context': 'http://schema.org', '@type': 'Re...",447,66,,5,9,17,13,2,0.0,6,830,"tomatoes, onion, garlic, olive oil, saffron th..."
1452,https://cooking.nytimes.com/recipes/1020150-pr...,Pressure Cooker Pork Puttanesca Ragù,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, meat, pastas, main course",italian,"[3 to 3 1/2 pounds boneless, skinless pork sho...","[Using a sharp knife, trim and discard the lar...","{'@context': 'http://schema.org', '@type': 'Re...",571,17,,4,36,38,22,12,0.0,8,802,"pork shoulder, kosher salt and black pepper, o..."


Give it a test run with some ingredients

In [29]:
test_ingredients = 'masa harina'

# use our pretrained tfidf model to encode our input ingredients
ingredients_tfidf = tfidf_extractor.transform([test_ingredients])

# calculate cosine similarity between actual recipe ingreds and test ingreds
cos_sim = map(lambda x: cosine_similarity(ingredients_tfidf, x), ingre_encodings)
scores = list(cos_sim)

In [32]:
def get_recommendations(N, scores):
    # load in recipe dataset
    df_recipes = nytc_features # pd.read_csv(config.PARSED_PATH)
    # order the scores with and filter to get the highest N scores
    top = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:N]
    # create dataframe to load in recommendations
    recommendation = pd.DataFrame(columns = ['recipe', 'ingredients', 'score', 'cuisine', 'instruction'])
    count = 0
    for i in top:
        recommendation.at[count, 'recipe'] = (df_recipes['recipe_name'][i])
        
        recommendation.at[count, 'ingredients'] = (df_recipes['ingredient_parsed'][i])
        
        recommendation.at[count, 'cuisine'] = df_recipes['cuisine'][i]
        recommendation.at[count, 'score'] = "{:.3f}".format(float(scores[i]))
        recommendation.at[count, 'instruction'] = df_recipes['instruction'][i]
        
        count += 1
    return recommendation


In [33]:
get_recommendations(10, scores)

Unnamed: 0,recipe,ingredients,score,cuisine,instruction
0,Almost-From-Scratch Corn Tortillas,"masa harina, salt, vegetable oil, water, flour",0.872,mexican,[Combine the masa and salt in a bowl; stir in ...
1,Gorditas de Maíz,"fresh fine-grind corn masa masa harina, sea salt",0.764,mexican,"[If using fresh masa, knead the masa, 1/2 teas..."
2,Tortillas de Maíz,"fresh fine-grind corn masa masa harina, sea salt",0.764,mexican,"[Set out a 1-gallon zip-top freezer bag, a pla..."
3,Fresh Tortillas,masa,0.694,mexican,"[If you are making the masa, choose a good mas..."
4,Cheese Pupusas,"kosher salt, masa harina, water, industrial mo...",0.551,central american,"[In a large bowl, mix the salt well into the m..."
5,Mark Bittman’s Tamales,"dried corn husks, masa harina, chicken stock, ...",0.548,mexican,[Soak husks in warm water for at least 3 hours...
6,Mexican-Style Atole,"piloncillo cane sugar dark brown sugar, cinnam...",0.503,mexican,"[In a medium saucepan, combine 2 cups water wi..."
7,Chipotle Hamburgers On Gorditas,"chili peppers, salt, unsalted butter, masa har...",0.495,mexican,"[For the herb butter, in a small bowl, mash th..."
8,Cornmeal Beignets With Roasted Apples,"cinnamon, unsalted butter, apples, milk, unsal...",0.44,french,[Preheat oven to 350 degrees. Knead the cinnam...
9,Squash Blossom Quesadillas,"masa harina corn flour, salt, vegetable oil la...",0.42,mexican,[For the tortillas: combine the masa harina an...


In [22]:
nytc_features['ingredient_parsed'].to_list()

['sea salt, potatoes, cheddar, handful of cilantro, garlic, ground cumin, paprika, corn tortillas, neutral oil, tomatoes, red onion, of cilantro, fresh serrano or fresno chile (seeded, garlic, ground cumin, oregano, granulated sugar, sea salt, vegetable stock',
 'nonstick cooking spray, creamed corn, kernel corn, salted butter, granulated sugar, eggs, sour cream, milk, crema',
 'fresh fine-grind corn masa masa harina, sea salt',
 'fresh fine-grind corn masa masa harina, sea salt',
 'fresh fine-grind corn masa masa harina, sea salt, lard, white onion, garlic, chiles de árbol, fresh avocado leaves, black beans and their liquid a href " https cooking.nytimes.com recipes 1024131-frijoles-de-olla " frijoles de olla their liquid), crema',
 'black beans, white onion, garlic, dried or fresh avocado or bay leaves, fresh epazote sprigs or a combination of parsley, sea salt',
 'black tea bags, span a href " https cooking.nytimes.com recipes 1024366-lemonade " span lemonade span a, lemon, ice',
 '

TFIDF Feature: cuisine + ingredients

In [None]:
# Tfidf needs unicode or string types
nytc_features['ingredient_parsed'] = nytc_features['ingredient_parsed']

# TF-IDF feature extractor
tfidf_extractor = TfidfVectorizer()
tfidf_encodings = tfidf_extractor.fit_transform(nytc_features['ingredient_parsed'])

# save the tfidf model and encodings
with open('../model_checkpoints/tfidf_ing.pkl', "wb") as f:
     pickle.dump(tfidf_extractor, f)
with open('../encodings/tfidf_ing.pkl', "wb") as f:
     pickle.dump(tfidf_encodings, f)

In [20]:
for i in range(0):
    print('hi')