### HyDE (Hypotethical Document Embedder)

HyDE is primarily used in semantic search and information retrieval tasks to create more effective embeddings by simulating or hypothesizing what relevant documents might look like based on a given query.

In [1]:
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import LLMChain, HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate

from langchain.document_loaders import TextLoader
import langchain
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma



In [2]:
model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)
# Set up the LLM
llm = OpenAI()

  from tqdm.autonotebook import tqdm, trange
  warn_deprecated(


In [3]:
# Load with `web_search` prompt
embeddings = HypotheticalDocumentEmbedder.from_llm(llm,
                                                   bge_embeddings,
                                                   prompt_key="web_search"
                                                   )

In [4]:
embeddings.llm_chain.prompt

PromptTemplate(input_variables=['QUESTION'], template='Please write a passage to answer the question \nQuestion: {QUESTION}\nPassage:')

In [5]:
langchain.debug = True


In [6]:
loaders = [
    TextLoader('/home/heliya/Desktop/rag_approaches/src/rag_approaches/dataset/blog_post/blog.langchain.dev_announcing-langsmith_.txt'),
    TextLoader('/home/heliya/Desktop/rag_approaches/src/rag_approaches/dataset/blog_post/blog.langchain.dev_benchmarking-question-answering-over-csv-data_.txt'),
    TextLoader('/home/heliya/Desktop/rag_approaches/src/rag_approaches/dataset/blog_post/blog.langchain.dev_chat-loaders-finetune-a-chatmodel-in-your-voice_.txt'),
]
docs = []
for l in loaders:
    docs.extend(l.load())

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

texts = text_splitter.split_documents(docs) #split

In [7]:
docsearch = Chroma.from_documents(texts, embeddings)

query = "What are chat loaders?"
docs = docsearch.similarity_search(query)

[32;1m[1;3m[llm/start][0m [1m[llm:OpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Please write a passage to answer the question \nQuestion: What are chat loaders?\nPassage:"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[llm:OpenAI] [2.31s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " Chat loaders, commonly known as chatbots, are computer programs designed to simulate conversation with human users through text or voice interactions. They are typically powered by artificial intelligence and are used in messaging applications, websites, and mobile apps to provide automated customer support, gather information, and perform tasks such as scheduling appointments or making reservations. Chat loaders have become increasingly popular in recent years due to their ability to provide efficient and personalized customer service, as well as their 24/7 availability. They are also used in various industries, including e-commerce, healthcare, and bank