In [1]:
! pip install -qU -r requirements.txt

In [57]:
from decouple import AutoConfig
from icecream import ic
import bs4
from langchain_groq import ChatGroq
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # if we define a variable in MessagePlaceholder in that variable only chat history will be stored
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain  #  can be used to create a chain out of retriever
from langchain.chains import create_history_aware_retriever # can be used to create a history aware retriever
from langchain.chains.combine_documents import create_stuff_documents_chain # combines all documents and sends it to PromptTemplate


In [3]:
config = AutoConfig(search_path='../notes-rag/')

HF_TOKEN = config("HF_TOKEN")
GROQ_API_KEY = config("GROQ_API_KEY")
LANGCHAIN_TRACING_V2 = config("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = config("LANGCHAIN_ENDPOINT")
LANGCHAIN_PROJECT = config("LANGCHAIN_PROJECT")
lANGCHAIN_API_KEY = config("LANGCHAIN_API_KEY")

In [26]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
llm = ChatGroq(model="llama3-8b-8192", api_key=GROQ_API_KEY)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

In [24]:
loader = WebBaseLoader(
    web_path="https://lilianweng.github.io/posts/2023-06-23-agent/",
    bs_kwargs= dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    )
)
documents = loader.load()

In [30]:
splits = text_splitter.split_documents(documents)

In [37]:
vector_store = Chroma.from_documents(splits, embedding=embeddings) # storing documents in vector database
retriever = vector_store.as_retriever() # converting vector store as a retriever runnable

In [39]:
system_prompt = (
    "you are an assistant for question - answering tasks. "
    "use the following pieces of retrieved context to answer "
    "the question. if you don't know the answer, say that you "
    "don't know. use three sentences maximum to keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

In [43]:
# here create_stuff_documents_chain combines all documents from retriever and pass it to the prompt as context
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [48]:
response = rag_chain.invoke({"input": "hey, what is self-reflection ?"})
print(response, end="\n\n")
print(response['answer'])

{'input': 'hey, what is self-reflection ?', 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.'), Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 3. Illustration of the Refl

In [55]:
response = rag_chain.invoke({"input": "how do we achieve it ?"})
print(response['answer']) # problem with this response is its not aware of what my initial question was.

According to the context, task decomposition in Tree of Thoughts can be achieved through three methods: (1) using a Large Language Model (LLM) with simple prompting, (2) using task-specific instructions, or (3) with human inputs.


#### Adding Chat history

In [62]:
contextualize_q_system_prompt = (
    "given a chat history and the latest user question "
    "which might reference context in the chat history "
    "formulate a standalone question which can be understood "
    "without the chat history. do not answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")
])

In [63]:
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
question_answer_chain = create_stuff_documents_chain()