In [None]:
# install required packages
! pip install sentence-transformers
! pip install langchain
! pip install faiss-cpu

In [None]:
import json
import re
import pickle

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings

In [None]:
embedding_model=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db=FAISS.from_documents([Document(page_content="McDonald")],embedding_model)

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
menu=json.loads(requests.get("https://gist.githubusercontent.com/xapss/f1bc847ed57236c11f1e810095fa7555/raw/57bfab76abdecb0de83476fae953fac8b8c68378/menu.json").content)

In [None]:
# we ignore the Location and Menus, for Menus, it require different operations, therefore, we will ignore this one
category_keys=list(menu.keys())[1:-1]

In [None]:
def get_sub_menu_table(menu,key):
  sub_menu=pd.DataFrame.from_records(menu[key]).T
  sub_menu.columns=['name','price','extra']
  sub_menu.index.name='id'
  sub_menu.reset_index(inplace=True)
  sub_menu_extra=pd.concat(sub_menu['extra'].transform(pd.json_normalize).tolist()).reset_index(drop=True)
  sub_menu=pd.concat([sub_menu,sub_menu_extra],axis=1)
  sub_menu['type']=key
  return sub_menu

In [None]:
menu_table=pd.concat(map(lambda key:get_sub_menu_table(menu,key).set_index('id'),category_keys))

In [None]:
def get_documents(df:pd.DataFrame):
  df=df.reset_index()[['id','type','name','price','available']]
  doc_format="id:{id}, type:{type}, name:{name}, price: {price}, available: {available}"
  docs=[]
  for value_dict in df.to_dict(orient='records'):
    docs.append(Document(page_content=doc_format.format(**value_dict),metadata=value_dict))
  return docs



In [None]:
db=FAISS.from_documents(get_documents(menu_table),embedding_model)

In [None]:
db.similarity_search("Hi, do you have cola?")

[Document(page_content='id:D2, type:Drinks, name:Pepsi, price: 2.8, available: False', metadata={'id': 'D2', 'type': 'Drinks', 'name': 'Pepsi', 'price': 2.8, 'available': False}),
 Document(page_content='id:D5, type:Drinks, name:Sourcy, price: 15, available: False', metadata={'id': 'D5', 'type': 'Drinks', 'name': 'Sourcy', 'price': 15, 'available': False}),
 Document(page_content='id:D7, type:Drinks, name:Guava, price: 15, available: False', metadata={'id': 'D7', 'type': 'Drinks', 'name': 'Guava', 'price': 15, 'available': False}),
 Document(page_content='id:D8, type:Drinks, name:Tea, price: 15, available: False', metadata={'id': 'D8', 'type': 'Drinks', 'name': 'Tea', 'price': 15, 'available': False})]

In [None]:
menu_table[menu_table['type']=='Drinks']

Unnamed: 0_level_0,name,price,extra,available,nutritionalInfo.kcal,nutritionalInfo.fat,nutritionalInfo.protein,nutritionalInfo.itemId,nutritionalInfo.allergens,type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
D1,Iced Tea,2.8,"{'nutritionalInfo': {'kcal': 340, 'fat': 17, '...",False,340.0,17.0,4.0,12.0,[wheat],Drinks
D10,Espresso,15.0,{'available': False},False,,,,,,Drinks
D11,Coffee,15.0,{'available': False},False,,,,,,Drinks
D12,Sisi,15.0,{'available': False},False,,,,,,Drinks
D13,Fernandes,15.0,{'available': False},False,,,,,,Drinks
D14,Lipton,15.0,{'available': False},False,,,,,,Drinks
D2,Pepsi,2.8,"{'nutritionalInfo': {'kcal': 170, 'fat': 10, '...",False,170.0,10.0,2.0,9.0,[dairy],Drinks
D3,7Up,2.8,"{'nutritionalInfo': {'kcal': 340, 'fat': 17, '...",False,340.0,17.0,4.0,10.0,[wheat],Drinks
D4,Fanta,2.8,"{'nutritionalInfo': {'kcal': 170, 'fat': 10, '...",False,170.0,10.0,2.0,11.0,[dairy],Drinks
D5,Sourcy,15.0,{'available': False},False,,,,,,Drinks
