# Indexing Recipe data with embeddings onto ElasticSearch and Query Search
We need to have an Elasticsearch server running. In this code we'll use the elasticsearch Python API to access the server we started at the default setting of localhost port 9200

In [13]:
#defining the elasticsearch mapping(schema) for our data we are going to index. We're indexing, ID, Dish (food-item-name), cuisine, diet, and recipe_embedding

from elasticsearch import Elasticsearch 
import numpy as np
import pandas as pd
import json

# we define a dense vector named recipe_embedding
# it is mandatory to spezify the dims, which is the size of your embeddings
# i've trained embeddings of size 100
newschema = { "mappings": {
    "properties": {
            "id": {
                "type": "text"
            },
            "dish": {
                "type": "keyword"
            },
            "cuisine": {
                "type": "keyword"
            },                
            "diet": {
                "type": "keyword"
            },
            "recipe_embedding": {
                "type": "dense_vector",
                "dims": 100
            }
    }
}
}

# Connecting to Elasticsearch server
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])


In [14]:
'''
Create index called recipedata1, specifying our mapping with body=newschema
and delete indices. 
es might show errors but still create and index, hence in order to delete a previously created index use this command below
'''
es.indices.delete(index='recipedata1', ignore=[400, 404])
es.indices.create(index="recipedata1", ignore=400, body=newschema)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'recipedata1'}

In [15]:
recipeData = pd.read_pickle("processed/df_indianRecipes.pkl")
recipeData.head()

Unnamed: 0,TranslatedRecipeName,TranslatedIngredients,Cuisine,Course,Diet,TranslatedInstructions,URL,clean_ingredients,ingredient_count,clean_instructions,recipe_embedding_fasttext
0,Masala Karela Recipe,"6 karela (bitter gourd/ pavakkai) - deseeded,s...",Indian,Side Dish,Diabetic Friendly,"to begin making the masala karela recipe,de-se...",https://www.archanaskitchen.com/masala-karela-...,"[salt, gram flmy besan, turmeric powder haldi,...",8,"[[begin, making, masala, karela], [karela, sli...","[-0.016737932, 0.35984662, -0.24503584, -0.063..."
1,Spicy Tomato Rice (Recipe),"2-1 / 2 cups rice - cooked, 3 tomatoes, 3 teas...",South Indian Recipes,Main Course,Vegetarian,"to make tomato puliogere, first cut the tomato...",http://www.archanaskitchen.com/spicy-tomato-ri...,"[tomato, bc belle bhat powder, salt, chickpea ...",10,"[[tomato, puliogere], [cut, tomatoes, mixer, g...","[0.034362823, 0.29279393, -0.15771821, -0.1977..."
2,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,"1-1/2 cups rice vermicelli noodles (thin),1 on...",South Indian Recipes,South Indian Breakfast,High Protein Vegetarian,"to begin making the ragi vermicelli recipe, fi...",http://www.archanaskitchen.com/ragi-vermicelli...,"[rice vermicelli noodle thin, pea matar, chill...",9,"[[begin, making, ragi, vermicelli], [firm, kee...","[0.025006209, 0.17075203, -0.1810968, -0.09942..."
3,Gongura Chicken Curry Recipe - Andhra Style Go...,"500 grams chicken,2 onion - chopped,1 tomato -...",Andhra,Lunch,Non Vegeterian,to begin making gongura chicken curry recipe f...,http://www.archanaskitchen.com/gongura-chicken...,"[gram chicken, chilly slit, turmeric powder ha...",12,"[[ients, aside, in, small, pan], [ium, heat], ...","[0.0039616567, 0.19029821, -0.08325197, -0.147..."
4,Andhra Style Alam Pachadi Recipe - Adrak Chutn...,"1 tablespoon chana dal, 1 tablespoon white ura...",Andhra,South Indian Breakfast,Vegetarian,"to make andhra style alam pachadi, first heat ...",https://www.archanaskitchen.com/andhra-style-a...,"[chana dal, white urad dal, chilly, es ginger ...",11,"[[andhra, style, alam, pachadi], [chillies], [...","[0.054038156, 0.27010298, -0.04160401, -0.1178..."


# Data ingestion scripts
- Converting data to JSON like format
- Connecting to Elasticsearch server using elasticsearch python api
- Creating a new index and indexing our data

In [16]:
'''
we have stored our file we want to index, and can read it any way we want, 
we need to convert each row of the dataframe into JSON-like, and can store it in a python list 
'''

ldocs = []
for rd in range(len(recipeData)):
    d = {}
    d['id'] = str(rd)
    try:
        d['dish'] = recipeData.loc[rd, "TranslatedRecipeName"]
        d['cuisine'] = recipeData.loc[rd, "Cuisine"]
        d['course'] = recipeData.loc[rd, "Course"]
        d['diet'] = recipeData.loc[rd, "Diet"]
        d['recipe_embedding'] = np.asarray(recipeData.loc[rd, "recipe_embedding_fasttext"]).tolist()
        ldocs.append(d)
    except:
        continue;
        
#lets take a look at one unit of data that we'll index        
ldocs[10]

{'id': '10',
 'dish': 'Homemade Baked Beans Recipe (Wholesome & Healthy)',
 'cuisine': 'Fusion',
 'course': 'High Protein Vegetarian',
 'diet': 'Vegetarian',
 'recipe_embedding': [0.049339815974235535,
  0.32872918248176575,
  -0.31305208802223206,
  -0.17112325131893158,
  0.0934978649020195,
  0.09800180047750473,
  -0.2998715341091156,
  0.28196626901626587,
  -0.5770992636680603,
  0.23351401090621948,
  -0.5348120927810669,
  0.19267436861991882,
  -0.05794635787606239,
  0.4129326641559601,
  0.10796818137168884,
  -0.10654804110527039,
  -0.12820935249328613,
  0.03483714535832405,
  -0.020380768924951553,
  -0.42462727427482605,
  -0.41584455966949463,
  0.2003045231103897,
  0.3545193076133728,
  -0.18771299719810486,
  -0.2513600289821625,
  0.15992297232151031,
  -0.22670400142669678,
  -0.019776202738285065,
  0.19192668795585632,
  -0.042548999190330505,
  0.1867438405752182,
  0.16027028858661652,
  -0.09517163038253784,
  0.006797747686505318,
  -0.01669739931821823,
  0

In [None]:
'''
Indexing all documents into the specific
'''

#indexing documents inside ldocs
for doc in ldocs:
    es.index(index='recipedata1', id=doc["id"], body=json.dumps(doc))

In [18]:
'''
Doing a clear search on our created index
'''
es.search(index="recipedata1", body={})

{'took': 5,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2915, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'recipedata1',
    '_type': '_doc',
    '_id': '0',
    '_score': 1.0,
    '_source': {'id': '0',
     'dish': 'Masala Karela Recipe',
     'cuisine': 'Indian',
     'course': 'Side Dish',
     'diet': 'Diabetic Friendly',
     'recipe_embedding': [-0.016737932339310646,
      0.3598466217517853,
      -0.24503584206104279,
      -0.06312741339206696,
      0.04885884374380112,
      -0.06961071491241455,
      -0.3705177307128906,
      0.05110880732536316,
      -0.45738348364830017,
      0.26189756393432617,
      -0.596495509147644,
      0.2606188952922821,
      -0.14178690314292908,
      0.22382326424121857,
      -0.0400623083114624,
      -0.1123562902212143,
      -0.06315835565328598,
      0.08911527693271637,
      -0.03258634731173515,
      -0.3037600517272949,
      -0.267

# dense-vector based search in ElasticSearch
We have embeddings for all dishes/food-items. Now, given a query at runtime, we have to get its *query-embedding*
The query embedding is nothing but *sentence-embedding*

The query is a sentence and we use the function *getSentenceEmbedding(sentence)* to get the query-embeddings. 
The function *getSimilarFood(query)* creates a custom query for elasticsearch using *script_score* 

In [28]:
from gensim.models import FastText

'''
loading our previously trained model in loaded_model
'''
loaded_model = FastText.load(r'C:\Users\arnab\Documents\workspace\food\whatscooking\models\model_indianfood_fasttext.model')

'''
function that creates sentence embeddings given a language model
loaded_model has the model we trained in previous step
'''
def getSentenceEmbedding(sentence):
    countFound = 0
    embeddingList = []
    for wordx in sentence:
        try:
            vector1 = loaded_model.wv[wordx]
            #print("word",wordx, vector1[:3])
            embeddingList.append(vector1)
            countFound+=1
        except:
            continue;
    sumEmbeddings = sum(embeddingList)
    return np.true_divide(sumEmbeddings, countFound)  
'''
Custom Elasticsearch search request throughg api function
'''
def getSimilarFood(query):
    query_emb = getSentenceEmbedding(query.split())
    script_query = { "script_score": {
        "query": {"match_all": {}},
        "script": {
          "source": "cosineSimilarity(params.query_vector, 'recipe_embedding') + 1.0",
          "params": {"query_vector": query_emb}}}}
    response = es.search(index = "recipedata2", body = {"query": script_query})
    resfinal = []
    
    '''
    you can explore the response variable to figure out more, 
    but the values of the fields we need are in response['hits']['hits']
    '''
    for el in response['hits']['hits']:
        dname = el['_source']['dish']
        dishName = " ".join([w for w in dname.split() if w.lower()!='recipe'])
        resfinal.append(dishName)
    return pd.DataFrame(resfinal,columns=['Recommended Dishes!'])

# exploring the results 
Here we'll input ingredients (tomatoes, bread, spinach, etc) and procedures (bake, boil, fry, etc), 
and we hope to get suggested matching dishes while searching with dense embeddings in ElasticSearch

In [29]:
getSimilarFood("flour yeast onions mozarella cheese")

Unnamed: 0,Recommended Dishes!
0,Whole Wheat Stuffed Breadsticks
1,Egg And Spinach Breakfast Pizza
2,Mexican Style Vegetarian Chimichanga Recipe-De...
3,Garlic Crust Vegetable Pizza
4,Veggie Pearl Millet Tart
5,Mini Chilli Cheese Aloo Kulcha
6,Whole Wheat Sun Dried Tomato Focaccia Bread | ...
7,Oven Crisped Burritos with Shredded Chicken
8,Mushroom Quiche
9,Chicken Crepe Lasagne


In [30]:
getSimilarFood("chicken tandoori")

Unnamed: 0,Recommended Dishes!
0,Paprika Chicken Skewers
1,Baked Paneer Corn Kebab
2,Chicken Tikka Taco Topped With Cheesy Garlic Mayo
3,Baked Fish Crisps (Fish Fry In Oven)
4,Beetroot Chicken Cutlets
5,Chicken Malai Kabab
6,Potato Roulade
7,Rosemary And Thyme Chicken
8,Spicy Kiwi Salsa with Feta Cheese
9,Crispy Vegetable Tempura


In [31]:
#baked items 
getSimilarFood("flour oil bake tomato cheese olive oregano")

Unnamed: 0,Recommended Dishes!
0,Halloumi Bruschetta with Tomato and Roasted Re...
1,Cheesy Vegetarian Pizza Muffins
2,Bloomed Potato Cheesy Muffins
3,Four Cheese and Spinach Stuffed Mushrooms
4,Roasted Vegetable Pasta Primavera
5,Chicken Crepe Lasagne
6,"Cauliflower Crust Pizza With Pesto, Sundried T..."
7,Spinach And Soya Sub Sandwich
8,Mini Pita Pizza With Tahini Roasted Vegetables
9,Roasted Eggplant Parmigiana


In [32]:
#Palak=Spinach in Hindi
getSimilarFood("paneer tomatoes palak")

Unnamed: 0,Recommended Dishes!
0,Chatpata Lobia Chaat - Healthy Black Eyed Bean...
1,Spicy Paneer Bhurji Pav Sandwich - Breakfast
2,Oats Cheela Stuffed With Creamy Palak Paneer
3,Paneer Onion Paratha
4,Tofu Bhurji - Spicy Scrambled Tofu
5,Stuffed Bhindi With Paneer (Recipe In Hindi)
6,Bathua Aloo Paratha
7,Peshawari Karahi Gosht
8,Kabuli Chana & Moongphali Salad
9,Sweet & Spicy Stuffed Paneer In Kofta Curry


In [33]:
getSimilarFood("milk strawberry banana")

Unnamed: 0,Recommended Dishes!
0,Chocolate Banana Smoothie Bowl
1,Strawberry Smoothie Bowl with chia seeds and M...
2,Oreo Milkshake
3,Banana And Mango Smoothie
4,Peanut Butter Oat Almond Energy Bar
5,Uttarakhand Style Singhal
6,Banana Digestive Pudding With Pistachios
7,Dragon Fruit and Rose Water Popsicle
8,Spinach Dates & Banana Smoothie
9,Overnight Oats (No Cook Blueberry Vanilla and ...


In [34]:
getSimilarFood("bhel puffed mix lemon")

Unnamed: 0,Recommended Dishes!
0,Watermelon Margarita Granita
1,Crunchy Carrots In Hot Dogs Buns
2,Healthy Fresh Vegetable Salad Bowl with Sour Y...
3,Sweet Potato & Rosemary Crisps/ Chips
4,Lemon Pudina Curry Leaf Shikanji
5,Kiwi Guava Spritzer
6,Spinach Watermelon Salad With Walnuts
7,Spinach & Apple Salad With Orange Dressing
8,Strawberry Mojito
9,Kiwi Basil Lemonade


# Thank you everyone!