# GPT4All test - RAG Chat with local model
https://gpt4all.io/index.html

In [1]:
# !pip install gpt4all 

In [2]:
# %pip install gpt4all > /dev/null

# Embedding

In [3]:
from langchain.embeddings import GPT4AllEmbeddings
gpt4all_embd = GPT4AllEmbeddings()

In [4]:
text = "This is a test document."
query_result = gpt4all_embd.embed_query(text)
print(f'embed slen: {len(query_result)}')
print(query_result[:20])

embed slen: 384
[-0.04898764193058014, 0.12305509299039841, -0.04326639696955681, 0.0587812215089798, 0.009365170262753963, -0.0384075902402401, -0.07787521928548813, 0.0464746430516243, -0.024038562551140785, 0.05405070260167122, 0.09310438483953476, 0.019093262031674385, 0.006022939924150705, -0.0064134784042835236, -0.06747597455978394, -0.016789510846138, -0.021481089293956757, -0.045280035585165024, 0.0024571875110268593, 0.10313370078802109]


# Vector DB

In [5]:
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [6]:
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

In [7]:
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
print(f'---Number of documents retrieved: {len(docs)}')
print(f'---Doc 0 length: {len(docs[0].page_content)}')
print(f'---Doc 0: {docs[0].page_content}')
print(f'---Doc 1 length: {len(docs[1].page_content)}')
print(f'---Doc 1: {docs[1].page_content}')

---Number of documents retrieved: 4
---Doc 0 length: 252
---Doc 0: Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.
---Doc 1 length: 293
---Doc 1: Challenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.


# GPT4All Model

In [8]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.prompts import PromptTemplate

In [9]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [10]:
from langchain.llms import GPT4All

llm = GPT4All(
    model="../local_models/gpt4all/mistral-7b-openorca.Q4_0.gguf",
    max_tokens=2048,
)

## simple chain

In [11]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate.from_template(
    "Summarize the main themes in these retrieved docs: {docs}"
)

# Chain
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Run
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
result = llm_chain(docs)

# Output
result["text"]

  warn_deprecated(


'\n\n1. Task decomposition can be done by LLMs with simple prompting, using task-specific instructions or human inputs.\n2. Challenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging for LLMs; they struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.\n3. Task execution involves expert models executing on specific tasks and logging results. Instructions are given, and the correctness of task results is judged.'

## QA chain
- stuff approach

In [13]:
# Prompt
from langchain import hub

rag_prompt = hub.pull("rlm/rag-prompt")
from langchain.chains.question_answering import load_qa_chain

# Chain
chain = load_qa_chain(llm, chain_type="stuff", prompt=rag_prompt)
# Run
chain({"input_documents": docs, "question": question}, return_only_outputs=True)

{'output_text': ' The approaches to Task Decomposition are (1) using simple prompting with LLMs, (2) providing task-specific instructions, and (3) incorporating human inputs.'}

In [14]:
# Prompt 2 (special Llama prompt)
rag_prompt_llama = hub.pull("rlm/rag-prompt-llama")
rag_prompt_llama

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="[INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>> \nQuestion: {question} \nContext: {context} \nAnswer: [/INST]"))])

In [15]:
# Chain
chain = load_qa_chain(llm, chain_type="stuff", prompt=rag_prompt_llama)
# Run
chain({"input_documents": docs, "question": question}, return_only_outputs=True)

{'output_text': ' The approaches to Task Decomposition are simple prompting, using task-specific instructions, and incorporating human inputs.'}

## Retrieval QA

In [16]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": rag_prompt_llama},
)

In [17]:
qa_chain({"query": question})

{'query': 'What are the approaches to Task Decomposition?',
 'result': ' The approaches to Task Decomposition are simple prompting, using task-specific instructions, and incorporating human inputs.'}

In [18]:
question2 = "How can the reflextion framework be used?"
qa_chain({"query": question2})

{'query': 'How can the reflextion framework be used?',
 'result': " The Reflexion framework can be used to equip agents with dynamic memory and self-reflection capabilities for improving reasoning skills in a reinforcement learning setup. It achieves this by using a heuristic function that determines when trajectories are inefficient or contain hallucinations, and adding reflections into the agent's working memory as context for querying large language models (LLMs)."}