# Food.com recipe and interactions

For the dataset visit [https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions](https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions)

In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [2]:
from pymongo import MongoClient

In [3]:
db = MongoClient()['recipes']

In [4]:
rdb = db['recipe']
tags = [x['tags'] for x in rdb.find()]
ingredients = [x['ingredients'] for x in rdb.find()]

## LDA for topic discovery

In [5]:
from gensim.corpora.dictionary import Dictionary
from gensim.models import LdaModel

### Ingredients

In [6]:
ingredients_dictionary = Dictionary(ingredients)
ingredients_corpus = [ingredients_dictionary.doc2bow(text) for text in ingredients]

In [7]:
lda = LdaModel(ingredients_corpus, num_topics=50, id2word=ingredients_dictionary)

In [9]:
for topic, words in lda.print_topics()[:4]:
    print(topic, words, '\n')

42 0.206*"green onion" + 0.088*"ginger" + 0.070*"tabasco sauce" + 0.059*"spinach" + 0.054*"chicken" + 0.045*"garlic" + 0.042*"dry sherry" + 0.035*"firm tofu" + 0.026*"oil" + 0.024*"oyster sauce" 

38 0.269*"kosher salt" + 0.065*"fresh ground black pepper" + 0.053*"olive oil" + 0.044*"lemons" + 0.038*"pork chops" + 0.032*"turkey" + 0.030*"unbleached flour" + 0.028*"fresh flat-leaf parsley" + 0.027*"roasted red pepper" + 0.026*"prepared horseradish" 

16 0.150*"margarine" + 0.124*"dry white wine" + 0.121*"cheese" + 0.119*"white pepper" + 0.046*"cool whip" + 0.035*"milk" + 0.033*"jalapenos" + 0.030*"american cheese" + 0.029*"butter" + 0.029*"long grain rice" 

29 0.131*"sour cream" + 0.128*"cheddar cheese" + 0.076*"onion" + 0.062*"butter" + 0.056*"milk" + 0.052*"salt" + 0.049*"pepper" + 0.048*"eggs" + 0.042*"ground beef" + 0.038*"monterey jack cheese" 



### Tags

In [10]:
tags_dictionary = Dictionary(tags)
tags_corpus = [tags_dictionary.doc2bow(text) for text in tags]

In [11]:
tag = LdaModel(tags_corpus, num_topics=50, id2word=tags_dictionary)

In [12]:
for topic, words in tag.print_topics()[:4]:
    print(topic, words, '\n')

33 0.208*"fruit" + 0.125*"main-ingredient" + 0.105*"berries" + 0.059*"apples" + 0.056*"course" + 0.056*"preparation" + 0.055*"time-to-make" + 0.055*"dietary" + 0.053*"desserts" + 0.045*"low-in-something" 

0 0.149*"high-in-something" + 0.147*"high-protein" + 0.069*"low-carb" + 0.069*"dietary" + 0.067*"low-in-something" + 0.053*"main-ingredient" + 0.052*"preparation" + 0.048*"time-to-make" + 0.040*"meat" + 0.040*"easy" 

4 0.380*"number-of-servings" + 0.219*"for-large-groups" + 0.077*"oamc-freezer-make-ahead" + 0.067*"4-hours-or-less" + 0.054*"preparation" + 0.050*"time-to-make" + 0.041*"course" + 0.020*"weeknight" + 0.017*"dietary" + 0.016*"coffee-cakes" 

6 0.137*"15-minutes-or-less" + 0.096*"low-in-something" + 0.088*"easy" + 0.087*"low-protein" + 0.077*"dietary" + 0.074*"preparation" + 0.074*"time-to-make" + 0.073*"5-ingredients-or-less" + 0.064*"course" + 0.060*"low-sodium" 

