In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from sklearn.metrics.pairwise import cosine_similarity



In [3]:
nytc_features = pd.read_pickle('../data/nytc_features.pkl')
nytc_features

Unnamed: 0,recipe_name,cuisine,ingredient,category,ingredient_parsed
0,Crispy Potato Tacos,mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...","dinner, tacos, appetizer, main course","sea salt, potatoes, cheddar, handful of cilant..."
1,Street Corn Pudding,southern,"[Nonstick cooking spray, 1 large or 2 medium j...","brunch, dinner, lunch, custards and puddings, ...","nonstick cooking spray, creamed corn, kernel c..."
2,Gorditas de Maíz,mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"project, side dish","fresh fine-grind corn masa masa harina, sea salt"
3,Tortillas de Maíz,mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"project, side dish","fresh fine-grind corn masa masa harina, sea salt"
4,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"dinner, lunch, dumplings, project, side dish","fresh fine-grind corn masa masa harina, sea sa..."
...,...,...,...,...,...
8548,Pork Chops Provencal,french,"[2 tablespoons olive oil, or as desired, 3 pou...","dinner, main course","olive oil, pork chops, mushrooms, dry white wi..."
8549,Spinach Linguine With Tomato Sauce,italian,"[1/2 pound fresh or dried green linguine, Salt...","dinner, pastas, main course","fresh green linguine, salt, tomatoes, olive oi..."
8550,Lobster and Olive Pasta Salad,italian,[3/4 pound mixed fresh yellow and green fettuc...,"dinner, pastas, main course","fresh yellow and green fettuccine, pimento-stu..."
8551,Breaded Sweetbreads,french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...","project, appetizer","sweetbreads, egg, water, oil, salt, freshly gr..."


TFIDF Feature: ingredient only

In [6]:
# Tfidf needs unicode or string types
nytc_features['ingredient_parsed'] = nytc_features['ingredient_parsed']

# TF-IDF feature extractor
tfidf_extractor = TfidfVectorizer()
tfidf_encodings = tfidf_extractor.fit_transform(nytc_features['ingredient_parsed'])

# save the tfidf model and encodings
with open('../model_checkpoints/tfidf_ing.pkl', "wb") as f:
     pickle.dump(tfidf_extractor, f)
with open('../encodings/tfidf_ing.pkl', "wb") as f:
     pickle.dump(tfidf_encodings, f)

In [36]:
test_ingredients = 'rice, clams'

# use our pretrained tfidf model to encode our input ingredients
ingredients_tfidf = tfidf.transform([test_ingredients])

# calculate cosine similarity between actual recipe ingreds and test ingreds
cos_sim = map(lambda x: cosine_similarity(ingredients_tfidf, x), tfidf_encodings)
scores = list(cos_sim)

In [39]:
def get_recommendations(N, scores):
    # load in recipe dataset
    df_recipes = nytc_features # pd.read_csv(config.PARSED_PATH)
    # order the scores with and filter to get the highest N scores
    top = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:N]
    # create dataframe to load in recommendations
    recommendation = pd.DataFrame(columns = ['recipe', 'ingredients', 'score', 'cuisine'])
    count = 0
    for i in top:
        recommendation.at[count, 'recipe'] = (df_recipes['recipe_name'][i])
        
        recommendation.at[count, 'ingredients'] = (df_recipes['ingredient_parsed'][i])
        
        recommendation.at[count, 'cuisine'] = df_recipes['cuisine'][i]
        recommendation.at[count, 'score'] = "{:.3f}".format(float(scores[i]))
        
        count += 1
    return recommendation


In [40]:
get_recommendations(10, scores)

Unnamed: 0,recipe,ingredients,score,cuisine
0,Clam Chowder,"clams, onion, potatoes, salt and freshly groun...",0.587,new england
1,Fried Razor Clams (or Steamers),"salt, razor clams clams, cornmeal, frying oil,...",0.546,new england
2,Pork Chops With Clams,"clams, salt, freshly ground black pepper, oil,...",0.524,new england
3,Clams In Sherry Sauce,"extra virgin olive oil, garlic, shallots onion...",0.451,new england
4,White Clam Topping,"garlic, olive oil, littleneck clams, oregano, ...",0.439,italian
5,Kimchi Gazpacho With Clams,"tomatoes, kimchi, kirby cucumber, garlic, rice...",0.428,spanish
6,Clam Chowder,"bacon, onion, potatoes, milk, clams, thyme, sa...",0.415,new england
7,Risotto of Clams,"cherrystone clams, water, olive oil, onion, ga...",0.379,italian
8,Clam-stuffed Sole Fillets (Filets de Sole Farc...,"salt, freshly ground pepper, butter, onion, gr...",0.364,french
9,Steamed Clams,"littleneck clams, unsalted butter, chorizo or ...",0.353,american


TFIDF Feature: cuisine + ingredients