# Food.com recipe and interactions

For the dataset visit [https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions](https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions)

In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [2]:
from pymongo import MongoClient

In [3]:
db = MongoClient()['recipes']

In [4]:
rdb = db['recipe']
tags = [x['tags'] for x in rdb.find()]
ingredients = [x['ingredients'] for x in rdb.find()]

In [5]:
rdb.count_documents({})

231637

In [6]:
rdb.find_one()

{'_id': ObjectId('641486dcb890bb76b1765e10'),
 'name': 'arriba   baked winter squash mexican style',
 'id': 137739,
 'minutes': 55,
 'contributor_id': 47892,
 'submitted': datetime.datetime(2005, 9, 16, 0, 0),
 'tags': ['60-minutes-or-less',
  'time-to-make',
  'course',
  'main-ingredient',
  'cuisine',
  'preparation',
  'occasion',
  'north-american',
  'side-dishes',
  'vegetables',
  'mexican',
  'easy',
  'fall',
  'holiday-event',
  'vegetarian',
  'winter',
  'dietary',
  'christmas',
  'seasonal',
  'squash'],
 'nutrition': [51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0],
 'n_steps': 11,
 'steps': ['make a choice and proceed with recipe',
  'depending on size of squash , cut into half or fourths',
  'remove seeds',
  'for spicy squash , drizzle olive oil or melted butter over each cut squash piece',
  'season with mexican seasoning mix ii',
  'for sweet squash , drizzle melted honey , butter , grated piloncillo over each cut squash piece',
  'season with sweet mexican spice mix',
  'bak

## LDA for topic discovery

In [7]:
from gensim.corpora.dictionary import Dictionary
from gensim.models import LdaModel

### Ingredients

In [8]:
ingredients_dictionary = Dictionary(ingredients)
ingredients_corpus = [ingredients_dictionary.doc2bow(text) for text in ingredients]

In [9]:
lda = LdaModel(ingredients_corpus, num_topics=50, id2word=ingredients_dictionary)

In [13]:
for word_id, p in lda.get_topic_terms(2):
    print(ingredients_dictionary[word_id], p)

pineapple juice 0.09785636
crushed pineapple 0.08286322
baby carrots 0.05818951
fresh coarse ground black pepper 0.048560306
tuna in water 0.047297843
ground red pepper 0.046652816
sugar 0.0307647
unsweetened cocoa 0.029787276
dark sesame oil 0.029043335
water 0.028551007


In [14]:
for topic, words in lda.print_topics()[:4]:
    print(topic, words, '\n')

14 0.227*"unsalted butter" + 0.106*"granulated sugar" + 0.083*"light brown sugar" + 0.054*"peanut butter" + 0.047*"vanilla extract" + 0.037*"cocoa powder" + 0.036*"salt" + 0.036*"pure vanilla extract" + 0.031*"eggs" + 0.026*"sugar" 

41 0.134*"ground turkey" + 0.120*"molasses" + 0.111*"hot water" + 0.074*"ground pepper" + 0.063*"apple juice" + 0.046*"onion soup mix" + 0.038*"corn syrup" + 0.036*"great northern beans" + 0.031*"corn oil" + 0.029*"condensed cream of mushroom soup" 

49 0.090*"frozen chopped spinach" + 0.061*"whipped cream" + 0.060*"frozen spinach" + 0.059*"fresh basil leaf" + 0.052*"cottage cheese" + 0.050*"parmesan cheese" + 0.046*"asparagus" + 0.037*"lasagna noodles" + 0.035*"whole wheat pastry flour" + 0.032*"nutritional yeast" 

6 0.177*"egg whites" + 0.083*"red wine" + 0.079*"cornmeal" + 0.076*"barbecue sauce" + 0.073*"basil leaves" + 0.072*"coconut" + 0.062*"splenda sugar substitute" + 0.033*"dried marjoram" + 0.027*"vegetable shortening" + 0.026*"2% low-fat milk" 


### Tags

In [15]:
tags_dictionary = Dictionary(tags)
tags_corpus = [tags_dictionary.doc2bow(text) for text in tags]

In [16]:
tag = LdaModel(tags_corpus, num_topics=50, id2word=tags_dictionary)

In [17]:
for topic, words in tag.print_topics()[:4]:
    print(topic, words, '\n')

6 0.283*"condiments-etc" + 0.137*"sauces" + 0.086*"course" + 0.074*"time-to-make" + 0.072*"preparation" + 0.051*"savory-sauces" + 0.040*"number-of-servings" + 0.039*"15-minutes-or-less" + 0.039*"dietary" + 0.029*"easy" 

43 0.261*"appetizers" + 0.096*"course" + 0.089*"time-to-make" + 0.089*"preparation" + 0.070*"dips" + 0.064*"easy" + 0.044*"dietary" + 0.042*"dinner-party" + 0.039*"occasion" + 0.036*"15-minutes-or-less" 

14 0.357*"holiday-event" + 0.164*"occasion" + 0.085*"christmas" + 0.077*"dinner-party" + 0.051*"thanksgiving" + 0.048*"heirloom-historical" + 0.030*"valentines-day" + 0.026*"independence-day" + 0.025*"new-years" + 0.020*"easter" 

15 0.359*"european" + 0.312*"cuisine" + 0.030*"1-day-or-more" + 0.028*"english" + 0.028*"pizza" + 0.025*"preparation" + 0.025*"time-to-make" + 0.022*"course" + 0.019*"scandinavian" + 0.018*"canning" 

