In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
import os 
from dotenv import load_dotenv
from time import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
loader = TextLoader('New Vegetarian Dishes.txt', encoding='utf-8')
documents = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [4]:
len(texts)

227

In [5]:
texts[10]

Document(page_content="Firstly, the beans should invariably be washed and placed in a basin of\ncold water the night before they are required for use, and should remain\nin soak about ten or twelve hours. If left longer than this during hot\nweather they are apt to turn sour.\n\nThey should not be cooked in the same water that they have been soaked\nin.\n\nSoft water must be used to cook them. If this be not obtainable,\nMaignen's Ante-Calcaire will be found to render the water soft.\n\nSalt should not be added until they are at least half cooked, as its\ntendency is to harden them. This applies also to peas, lentils, etc.\n\nThey take about two hours to cook, or three if required very soft.\n\nThey must not be allowed to boil very fast, for, like potatoes, they are\nthen liable to break before becoming tender.\n\nAbout two pints of water, one ounce of butter, and one teaspoon of salt\nto half-pint of soaked beans, may be taken as a fair average.", metadata={'source': 'New Vegetarian D

In [6]:
_ = load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm=HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", 
    model_kwargs={"temperature":0.2, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
    )

In [None]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                                      model_kwargs={"device": "cuda"})

In [None]:
%%time
persist_directory = 'db_HuggingFace'

embedding = instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
docs = retriever.get_relevant_documents("New Vegetarian Dishes')")

In [None]:
len(docs)

In [None]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [None]:
def process_llm_response(qa_chain, query):
    print(f"Query: {query}\n")
    time_1 = time()
    llm_response = qa_chain(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print(f"\nResult:", llm_response['result'])
    print(f"\nmetadata:", llm_response['source_documents'][0].metadata)

In [None]:
query = "2 soup receipes from this book"
process_llm_response(qa_chain, query)
#wrong metadata

In [None]:
query = "1 receipe of main dish"
process_llm_response(qa_chain, query)

In [None]:
query = "List out 10 ingredients that a new vegetarian could eat"
process_llm_response(qa_chain, query)

In [None]:
query = "1 receipe that use mushroom and tofu"
process_llm_response(qa_chain, query)

In [None]:
query = "1 dessert recipe"
process_llm_response(qa_chain, query)

In [None]:
query = "How to make Onion Soup?"
process_llm_response(qa_chain, query)

In [None]:
query = "How much whole flour need for making Whole Meal Biscuits?"
process_llm_response(qa_chain, query)

In [None]:
query = "What is No. 54 receipe?"
process_llm_response(qa_chain, query)

In [None]:
query = "Recommend a main dish receipe that using avocado"
process_llm_response(qa_chain, query)

In [None]:
query = "How many receipes does this book contain?"
process_llm_response(qa_chain, query)

In [None]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)