In [None]:
import os

from langchain_community.document_loaders import WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import SystemMessagePromptTemplate
from langchain.prompts import ChatPromptTemplate
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')


In [11]:
loader = WikipediaLoader(query="Steve Jobs", load_max_docs=5)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
docs = text_splitter.split_documents(documents=documents)

In [12]:
embeddings = OpenAIEmbeddings()
llm = ChatOpenAI(model='gpt-4o')
vector_store = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="output")


In [13]:
llm.invoke("hi").content

'Hello! How can I assist you today?'

In [14]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000024267989B10>, search_kwargs={'k': 5})

In [15]:
def get_hyde_prompt(query):
    template="""Imagine you are an expert writing a detailed explanation on the topic: '{query}'
                create a hypothetical answer for the topic.
                """

    system_message_prompt = SystemMessagePromptTemplate.from_template(template=template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    messages = chat_prompt.format_prompt(query=query).to_messages()

    print(messages)

    response = llm.invoke(messages)
    hypo_doc = response.content

    return hypo_doc


In [16]:
query = "Why was Steve Jobs fired from Apple?"
print(get_hyde_prompt(query=query))

[SystemMessage(content="Imagine you are an expert writing a detailed explanation on the topic: 'Why was Steve Jobs fired from Apple?'\n                create a hypothetical answer for the topic.\n                ", additional_kwargs={}, response_metadata={})]
Steve Jobs, a visionary and co-founder of Apple Inc., was famously ousted from the company he helped create in 1985. This pivotal moment in the tech world was the result of a series of strategic, managerial, and personal conflicts that unfolded within Apple during the early 1980s. While the public knowledge outlines several key factors contributing to Jobs' departure, let's examine a hypothetical, more detailed explanation that might have played out behind the scenes.

### The Dynamics of Power and Vision

1. **Differing Visions:**
   By the mid-1980s, Apple's leadership was divided between two schools of thought. Steve Jobs, with his extraordinary vision for innovation and design, pushed for pioneering products like the Macintosh

In [17]:
matched_doc = retriever.invoke(get_hyde_prompt(query))
print(matched_doc)

[SystemMessage(content="Imagine you are an expert writing a detailed explanation on the topic: 'Why was Steve Jobs fired from Apple?'\n                create a hypothetical answer for the topic.\n                ", additional_kwargs={}, response_metadata={})]
[Document(metadata={'title': 'Steve Jobs', 'source': 'https://en.wikipedia.org/wiki/Steve_Jobs', 'summary': 'Steven Paul Jobs (February 24, 1955 – October 5, 2011) was an American businessman, inventor, and investor best known for co-founding the technology company Apple Inc. Jobs was also the founder of NeXT and chairman and majority shareholder of Pixar. He was a pioneer of the personal computer revolution of the 1970s and 1980s, along with his early business partner and fellow Apple co-founder Steve Wozniak.\nJobs was born in San Francisco in 1955 and adopted shortly afterwards. He attended Reed College in 1972 before withdrawing that same year. In 1974, he traveled through India, seeking enlightenment before later studying Zen

In [18]:
matched_doc

[Document(metadata={'title': 'Steve Jobs', 'source': 'https://en.wikipedia.org/wiki/Steve_Jobs', 'summary': 'Steven Paul Jobs (February 24, 1955 – October 5, 2011) was an American businessman, inventor, and investor best known for co-founding the technology company Apple Inc. Jobs was also the founder of NeXT and chairman and majority shareholder of Pixar. He was a pioneer of the personal computer revolution of the 1970s and 1980s, along with his early business partner and fellow Apple co-founder Steve Wozniak.\nJobs was born in San Francisco in 1955 and adopted shortly afterwards. He attended Reed College in 1972 before withdrawing that same year. In 1974, he traveled through India, seeking enlightenment before later studying Zen Buddhism. He and Wozniak co-founded Apple in 1976 to further develop and sell Wozniak\'s Apple I personal computer. Together, the duo gained fame and wealth a year later with production and sale of the Apple II, one of the first highly successful mass-produce

### We have inbuilt library for HyDE
- HypotheticalDocumentEmbeder

In [20]:
import os

from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
from langchain_community.document_loaders import WikipediaLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import SystemMessagePromptTemplate
from langchain.prompts import ChatPromptTemplate
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')


In [21]:
loader = TextLoader('langchain_crewai_dataset.txt')

docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(docs)
len(chunks)

241

In [24]:
base_embeddings = OpenAIEmbeddings()
llm = ChatOpenAI(model="gpt-4o")

hyde_embedding_function = HypotheticalDocumentEmbedder.from_llm(
    llm=llm,
    base_embeddings=base_embeddings,
    prompt_key='web_search'
)

hyde_embedding_function

HypotheticalDocumentEmbedder(verbose=False, base_embeddings=OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000002426F366450>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000002426F367F10>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True), llm_chain=PromptTemplate(input_variables=['QUESTION'], input_types={}, partial_variables={}, template='P

According to the official documentation and LangChain source code (mapping in PROMPT_MAP), the default options are:
- web_search
- sci_fact
- arguana
- trec_covid
- fiqa
- dbpedia_entity
- trec_news
- mr_tydi

In [25]:
vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=hyde_embedding_function,
    persist_directory="output/langchain"
)

In [27]:
rag_prompt = ChatPromptTemplate.from_template(
    """
    Use the context below to answer the question.

    Context:
    {context}

    Question:
    {question}
"""
)

rag_chain = create_stuff_documents_chain(llm, rag_prompt)
rag_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    Use the context below to answer the question.\n\n    Context:\n    {context}\n\n    Question:\n    {question}\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000242679A6DD0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002426F1158D0>, root_client=<openai.OpenAI object at 0x00000242679B6A50>, root_async_client=<openai.AsyncOpenAI object at 0x00000242679B7B50>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| S

In [32]:
def hyde_rag_pipeline(query):
    matched_docs = vector_store.similarity_search(query)

    response = rag_chain.invoke({
        "question": query,
        "context": matched_docs
    })

    return response

In [33]:
query = "What memory modues does LangChain provides?"
answer = hyde_rag_pipeline(query)
print(answer)

LangChain provides memory modules like ConversationBufferMemory and ConversationSummaryMemory.
