# Basic Q&A with RAG

In [6]:
import bs4
from dotenv import load_dotenv
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.tracers.context import tracing_v2_enabled

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import HuggingFaceEmbeddings

In [2]:
load_dotenv()

True

In [3]:
def build_retriever(embeddings, docs):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
    retriever = vectorstore.as_retriever()
    return retriever

In [4]:
def crawl(urls):
    loader = WebBaseLoader(
        web_paths=urls,
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=("post-content", "post-title", "post-header")
            )
        ),
    )
    docs = loader.load()
    return docs

In [5]:
def build_chain(llm, retriever):
    format_docs = lambda docs: "\n\n".join(doc.page_content for doc in docs)
    prompt = hub.pull("rlm/rag-prompt")

    return (
        {
            "context": retriever | format_docs,
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
        | StrOutputParser()
    )

In [7]:
with tracing_v2_enabled(project_name="Basic Q&A LLAMA2"):
    docs = crawl(
        [
            "https://lilianweng.github.io/posts/2023-06-23-agent/"
        ]
    )
    llama2 = ChatOllama(model="llama2", temperature=0)
    llama2_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    llama2_retriever = build_retriever(llama2_embeddings, docs)
    llama2_chain = build_chain(llama2, llama2_retriever)
    answer = llama2_chain.invoke("What is Task Decomposition?")
    print("\n\n", answer)   
    llama2_retriever.vectorstore.delete_collection()


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



 Task decomposition is the process of breaking down a complex task into smaller, manageable steps. This technique is often used in artificial intelligence and machine learning to improve the performance of models on difficult tasks. There are several methods for decomposing tasks, including using simple prompts like "Steps for XYZ," instructing the model to think step by step, or using task-specific instructions. The design of the system must work within limited communication bandwidth, and mechanisms like self-reflection can help learn from past mistakes. However, planning over a lengthy history and effectively exploring the solution space remain challenging for LLMs, which can struggle to adjust plans when faced with unexpected errors.


In [8]:
with tracing_v2_enabled(project_name="Basic Q&A GPT"):
    docs = crawl(
        [
            "https://lilianweng.github.io/posts/2023-06-23-agent/"
        ]
    )
    gpt = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)  #type: ignore
    gpt_embeddings = OpenAIEmbeddings()
    gpt_retriever = build_retriever(gpt_embeddings, docs)
    gpt_chain = build_chain(gpt, gpt_retriever)
    answer = gpt_chain.invoke("What is Task Decomposition?")
    print("\n\n", answer) 
    gpt_retriever.vectorstore.delete_collection()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




 Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through various methods such as using prompting techniques, task-specific instructions, or human inputs. The goal is to make the task more manageable and facilitate the interpretation of the model's thinking process.


Upon examining the LangSmith traces of the two runs I'll see that the docs retrieved are different in both cases. This can be explained by the fact that they both use different embedding dimensions.