In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
import os 
from dotenv import load_dotenv
from time import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
loader = TextLoader('New Vegetarian Dishes.txt', encoding='utf-8')
documents = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [4]:
len(texts)

2475

In [5]:
texts[10]

Document(page_content='Language: English', metadata={'source': 'New Vegetarian Dishes.txt'})

In [6]:
_ = load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm=HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", 
    model_kwargs={"temperature":0.2, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
    )

In [7]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                                      model_kwargs={"device": "cuda"})

load INSTRUCTOR_Transformer
max_seq_length  512


In [8]:
%%time
persist_directory = 'db_HuggingFace'

embedding = instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

CPU times: total: 5.23 s
Wall time: 10.7 s


In [9]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
docs = retriever.get_relevant_documents("New Vegetarian Dishes')")

In [10]:
len(docs)

2

In [11]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [12]:
def process_llm_response(qa_chain, query):
    print(f"Query: {query}\n")
    time_1 = time()
    llm_response = qa_chain(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print(f"\nResult:", llm_response['result'])
    print(f"\nmetadata:", llm_response['source_documents'][0].metadata)

In [13]:
query = "2 soup receipes from this book"
process_llm_response(qa_chain, query)
#Correct metadata

Query: 2 soup receipes from this book

Inference time: 15.839 sec.

Result:  Here are the two soup recipes from the book:

No. 17.--Pea Soup.
Take one pound of dried peas, put them into a large saucepan, with two quarts of water, and let them boil till they are soft; then put in two onions, sliced, two carrots, sliced, two celery-stalks, sliced, and one leek, sliced; also one pound of lean bacon, cut into small pieces; let all these boil together till the vegetables are tender; then season with pepper and salt, and serve up.

No. 18.--Cauliflower Soup.
Take one head of cauliflower, break it into small florets, and put it into a saucepan with two quarts of water, and let it boil till tender; then put in two onions, sliced, two carrots, sliced, two celery-stalks, sliced, and one leek, sliced; also one pound of lean bacon, cut into small pieces; let all these boil together till the vegetables are tender; then season with pepper and salt, and serve up.

Note: Both of these soups seem to fo

In [25]:
query = "1 receipe of main dish"
process_llm_response(qa_chain, query)

Query: 1 receipe of main dish



ValueError: Error raised by inference API: Internal Server Error

In [15]:
query = "List out 10 ingredients that a new vegetarian could eat"
process_llm_response(qa_chain, query)

Query: List out 10 ingredients that a new vegetarian could eat

Inference time: 3.29 sec.

Result:  Some ingredients that a new vegetarian could consider eating include lentils, chickpeas, tofu, quinoa, spinach, kale, mushrooms, sweet potatoes, avocado, and broccoli. These foods are all rich in protein, fiber, and other essential nutrients that vegetarians need to maintain a healthy diet.

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [16]:
query = "1 receipe that use mushroom and tofu"
process_llm_response(qa_chain, query)

Query: 1 receipe that use mushroom and tofu

Inference time: 13.368 sec.

Result:  Mushroom and Tofu Stir-Fry

Ingredients:
- 1 block of firm tofu, drained and pressed
- 1 pound of mushrooms, sliced
- 1 red bell pepper, sliced
- 1 yellow onion, sliced
- 2 cloves of garlic, minced
- 1 tablespoon of grated ginger
- 2 tablespoons of soy sauce
- 2 tablespoons of oyster sauce
- 1 tablespoon of cornstarch
- 1 tablespoon of vegetable oil
- Salt and pepper to taste

Instructions:
1. Cut the tofu into cubes and set aside.
2. Heat the vegetable oil in a wok or large skillet over high heat.
3. Add the garlic and ginger and stir-fry for 30 seconds.
4. Add the mushrooms, bell pepper, and onion and stir-fry for 3-4 minutes or until the vegetables are tender.
5. Add the tofu and stir-fry for 1-2 minutes.
6. In a small bowl, whisk together the soy sauce, oyster sauce, and cornstarch.
7. Pour the sauce into the wok and stir-fry for 1-2 minutes or until the sauce has thickened.
8. Season with salt and p

In [17]:
query = "1 dessert recipe"
process_llm_response(qa_chain, query)
#1st time Different answer

Query: 1 dessert recipe

Inference time: 2.098 sec.

Result:  Unfortunately, based on the provided context, there is no dessert recipe included in the present book. The book seems to focus on savory dishes, specifically providing flavorings for sauces and stews.

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [18]:
query = "How to make Onion Soup?"
process_llm_response(qa_chain, query)

Query: How to make Onion Soup?

Inference time: 0.348 sec.

Result:  To make onion soup, sauté sliced onions in butter until they are caramelized. Add beef broth and simmer until the onions are tender. Season with salt, pepper, and thyme. Serve with crusty bread and melted cheese on top.

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [19]:
query = "How much whole flour need for making Whole Meal Biscuits?"
process_llm_response(qa_chain, query)

Query: How much whole flour need for making Whole Meal Biscuits?

Inference time: 0.334 sec.

Result:  4 ounces.

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [20]:
query = "What is No. 54 receipe?"
process_llm_response(qa_chain, query)
#Although can answer total number of receipes contain, still not able to answer this one

Query: What is No. 54 receipe?

Inference time: 2.711 sec.

Result:  Without further context, it is impossible to determine what No. 54 receipe is. It could be a recipe for a dish, a chemical reaction, or any other type of instructional step-by-step process. More information is needed to accurately answer this question.

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [21]:
query = "Recommend a main dish receipe that using avocado"
process_llm_response(qa_chain, query)

Query: Recommend a main dish receipe that using avocado

Inference time: 10.754 sec.

Result:  Avocado Chickpea Salad would be a great option for a vegetarian main dish. Here's the recipe:

Ingredients:
- 2 ripe avocados, diced
- 1 can chickpeas, drained and rinsed
- 1 red bell pepper, diced
- 1 small red onion, diced
- 1/2 cup cherry tomatoes, halved
- 1/4 cup chopped fresh parsley
- 1/4 cup chopped fresh cilantro
- 1/4 cup olive oil
- 2 tablespoons lemon juice
- 2 cloves garlic, minced
- Salt and pepper, to taste

Instructions:
1. In a large bowl, combine the avocados, chickpeas, red bell pepper, red onion, cherry tomatoes, parsley, and cilantro.
2. In a small bowl, whisk together the olive oil, lemon juice, garlic, salt, and pepper.
3. Pour the dressing over the salad and toss to combine.
4. Serve immediately or refrigerate until ready to serve.

Enjoy your delicious and healthy vegetarian main dish!

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [22]:
query = "How many receipes does this book contain?"
process_llm_response(qa_chain, query)
#Finally can answer this question

Query: How many receipes does this book contain?

Inference time: 0.749 sec.

Result:  This book contains 221 receipes.

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [23]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

('similarity', <langchain.vectorstores.chroma.Chroma at 0x28241343510>)

In [24]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:
