In [11]:
import pandas as pd
import numpy as np
import gensim
import random
from sklearn.feature_extraction.text import CountVectorizer
from gensim.matutils import Sparse2Corpus
from gensim.models.ldamodel import LdaModel
from gensim import models, similarities
from gensim.corpora import Dictionary

seed = 0

In [12]:
data = pd.read_pickle('data/raw_df.pkl')

In [13]:
def treat_ingredients(ing_list):
    output = []
    for ingredient in ing_list:
        ingredient_list = ingredient.split(' ')
        output.append("_".join(ingredient_list))
    return output

In [34]:
def get_similarity(lda, query_vector):
    index = similarities.MatrixSimilarity(lda[corpus])
    sims = index[query_vector]
    return sims

In [15]:
def train_lda_model(ingredients,num_topics = 100,passes = 15,random_state = seed):
    # get list of ingredients transformed
    ingredients_all = ingredients.apply(lambda x: treat_ingredients(x))
    
    #build dict (for Gensim vectorizer)
    dictionary = Dictionary([ing for ing in list(ingredients_all)])
    
    # build corpus
    corpus = [dictionary.doc2bow(text) for text in list(ingredients_all)]
    
    #train lda
    ldamodel = LdaModel(corpus,num_topics = num_topics, passes = passes,random_state = random_state, id2word = dictionary)
    return ldamodel,dictionary,corpus

In [17]:
sample_w = "butter, salt, pepper"

In [29]:
# define treat input function, returning a list of tokenized ingredients
def treat_words (words):
    list_words = words.split(",")
    output = []
    for w in list_words:
        output.append("_".join(w.strip().split(" ")))
    return output

In [35]:
def calculate_similarity(query,ldamodel,dct):
    # treat input words
    words_bow = dct.doc2bow(treat_words(query))
    query_vector = ldamodel[words_bow]
    
    #calculate ranking
    sim_rank = get_similarity(lda = ldamodel, query_vector = query)
    sim_rank = sorted(enumerate(sim_rank), key=lambda item: -item[1])
    
    return sim_rank

In [20]:
def calculate_recommendation(sim_rank,groups,n_reco = 10):
    results = [sim_rank[0][0]]
    results_prob = [sim_rank[0][1]]
    result_group = [sim_rank[0][1]]
        
    for recipe,group in zip(sim_rank[1:],groups[1:]):
        if group not in set(result_group):
            results.append(recipe[0])
            result_group.append(group)
            results_prob.append(recipe[1])
        if len(results) == n_reco:
            break
    print(result_group,"\n",results_prob)
    return results

In [32]:
# this is a wrapper function for calculate simu and calculate reco
def get_similarity_reco (query,ldamodel,dct,corpus,n_reco = 10):
    #calculate rank
    sim_rank = calculate_similarity(query,ldamodel,dct)
    #find groups according to lda model
    groups = []
    for l in ldamodel[corpus]:
        try:
            groups.append(l[0][0])
        except:
            groups.append(random.randint(1, 100))
            
    return calculate_recommendation(sim_rank,groups,n_reco)

In [23]:
def print_reco(results):
    return data.iloc[results]

In [24]:
def pretty_name (name):
    return " ".join([ word.capitalize() for word in name.split(" ") if word != ""])

In [25]:
ldamodel,dictionary,corpus = train_lda_model(data.ingredients)

In [27]:
query = "egg, cream, milk, fish"

In [None]:
results = get_similarity_reco (query, ldamodel, dct = dictionary, corpus = corpus,n_reco = 10)

In [None]:
print_reco(results)

[12, 60, 10, 1, 1, 49, 55, 2, 41, 14]

In [196]:
results = get_recommendation(sim_rank,groups)

[0.9521632, 60, 10, 1, 49, 55, 2, 41, 14, 43] 
 [0.9521632, 0.9430553, 0.93919826, 0.93579376, 0.9284587, 0.92755836, 0.9270968, 0.92068046, 0.9175818, 0.9174055]


In [186]:
results

[154035, 103402, 121804, 96941, 59028, 175016, 110783, 189900, 208529, 46195]

In [184]:
namelist = list(data.iloc[results].name)

In [197]:
[pretty_name(name) for name in namelist]

['Pasta With Kielbasa And Swiss Chard',
 'Healthy Chicken And Potato',
 'Lemon Lamb Soup',
 'Greek Like Potatoes',
 'Country Chicken And Vegetables Crock Pot',
 'Roasted Asparagus The Best I Ve Ever Tried',
 'Ina Garten Garlic Roast Chicken',
 'Smoked Sausage Green Beans And Potatoes',
 'Tangy Tamarind Chicken',
 'Chicken W Cream Of Mushroom Potatoes']

In [191]:
data.iloc[results]

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
154035,pasta with kielbasa and swiss chard,456943,55,28087,2011-05-22,"[60-minutes-or-less, time-to-make, main-ingred...","[598.8, 41.0, 7.0, 48.0, 37.0, 40.0, 24.0]",11,"[cut out center ribs and stems from chard , th...",posting for zwt 7. this recipes comes from epi...,"[swiss chard, kielbasa, olive oil, salt, garli...",9
103402,healthy chicken and potato,301847,25,830692,2008-05-01,"[30-minutes-or-less, time-to-make, course, mai...","[236.0, 10.0, 12.0, 3.0, 36.0, 9.0, 8.0]",4,[spray a larg skillet with non-stick cooking s...,very healthy and low in fat and calories. chic...,"[mixed vegetables, chicken breasts, onion, pot...",4
121804,lemon lamb soup,214325,60,457852,2007-02-28,"[60-minutes-or-less, time-to-make, course, mai...","[92.5, 0.0, 3.0, 30.0, 5.0, 0.0, 7.0]",10,"[add water and salt and bring to boil, add lam...",this is a hearty soup that works every time. i...,"[shoulder lamb chops, water, fennel bulb, pota...",7
96941,greek like potatoes,322775,30,730273,2008-09-03,"[30-minutes-or-less, time-to-make, course, mai...","[293.2, 21.0, 6.0, 0.0, 9.0, 9.0, 13.0]",9,"[peel and cut potatoes into bite-sized pieces,...",an easy side dish you can make that tastes great!,"[potatoes, lemon, garlic cloves, salt, olive oil]",5
59028,country chicken and vegetables crock pot,31781,492,27643,2002-06-20,"[weeknight, time-to-make, course, main-ingredi...","[576.1, 40.0, 31.0, 29.0, 74.0, 54.0, 15.0]",9,"[cut chicken breasts in half crosswise, in cro...",wonderful creamy chicken without heating up th...,"[chicken breasts, potatoes, baby carrots, onio...",11
175016,roasted asparagus the best i ve ever tried,366336,15,1193707,2009-04-16,"[15-minutes-or-less, time-to-make, course, mai...","[73.5, 7.0, 7.0, 17.0, 7.0, 3.0, 2.0]",3,"[heat oven to 350f, in large bowl combine aspa...","i got this from a ""real simple"" magazine long ...","[asparagus, olive oil, salt, cracked black pep...",4
110783,ina garten garlic roast chicken,444373,135,640155,2010-12-17,"[time-to-make, course, preparation, main-dish,...","[1974.5, 166.0, 59.0, 26.0, 241.0, 166.0, 42.0]",13,"[as soon as you get the chicken home , salt it...",easy to make.,"[chicken, garlic, lemon, onions, carrots, pota...",8
189900,smoked sausage green beans and potatoes,98035,40,124190,2004-08-17,"[60-minutes-or-less, time-to-make, course, mai...","[583.9, 48.0, 43.0, 43.0, 44.0, 53.0, 18.0]",5,"[cut kielbasa into bite sized pieces, in large...","simple, but very good. i think this may be a m...","[kielbasa, potatoes, onion, green beans, black...",6
208529,tangy tamarind chicken,9002,30,6164,2001-05-15,"[30-minutes-or-less, time-to-make, course, mai...","[441.8, 42.0, 54.0, 20.0, 59.0, 36.0, 6.0]",7,"[heat oil in a deep pan or skillet, add black ...",it is tangy and hot and very very yummy!,"[chicken, tamarind pulp, spicy tomato sauce, o...",9
46195,chicken w cream of mushroom potatoes,31807,55,45012,2002-06-20,"[60-minutes-or-less, time-to-make, preparation...","[454.4, 21.0, 8.0, 4.0, 71.0, 19.0, 15.0]",10,"[cut up potatoes into medium size pieces, in b...",my mother would make this.,"[chicken breasts, fat-free cream of mushroom s...",4
