In [53]:
### Index

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.retrievers import SVMRetriever
from langchain_openai import OpenAIEmbeddings
from langchain_community.retrievers import SVMRetriever


urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)



In [88]:
### Generate

from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser


from langchain_core.prompts import ChatPromptTemplate

from langchain_groq import ChatGroq


# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)



llm = ChatGroq(temperature=0, model_name="llama3-8b-8192", groq_api_key='')



# Post-processing
def format_docs(docs):
    
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
question = "agent memory"


In [89]:
from langchain_community.embeddings import GPT4AllEmbeddings
embeddings = GPT4AllEmbeddings()

Found model file at  /Users/pradeep.borado/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin


In [116]:
import numpy as np
from sklearn import svm
class Retriever():
    def __init__(self, docs, embeddings):
        self.embeddings = embeddings
        self.docs = docs
        x = [doc_split.page_content for doc_split in docs]
        embeds = embeddings.embed_documents(x)
        embeds_np = np.array(embeds)
        embeds_np = embeds_np / np.sqrt((embeds_np**2).sum(1, keepdims=True)) # L2 normalize the rows, as is common
        self.embeds = embeds_np
        
    def query(self, question, k=5):
        query = np.array(self.embeddings.embed_query(question))

        query = query / np.sqrt((query**2).sum())
        x = np.concatenate([[query], self.embeds]) 
        y = np.zeros(len(x))
        y[0] = 1 # we have a single positive example, mark it as such

        # train our (Exemplar) SVM
        # docs: https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
        clf = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=50000, tol=1e-5, C=1)
        clf.fit(x, y) # train

        # infer on whatever data you wish, e.g. the original data
        similarities = clf.decision_function(x)
        sorted_ix = np.argsort(-similarities)[1:]
        res = []
        for i in sorted_ix[:k]:
            res.append(self.docs[i-1])
        return res

In [117]:
retriever = Retriever(doc_splits, embeddings)

In [118]:
q = 'What are the components of a agent system'
d = retriever.query(q)
generation = rag_chain.invoke({"context": d, "question": q})
print(generation)



The components of an agent system include:

1. Planning: Task decomposition and self-reflection.
2. Memory: Types of memory, such as Maximum Inner Product Search (MIPS), and memory stream.
3. Tool Use: Case studies, such as scientific discovery agent and generative agents simulation.


In [119]:
q = 'agent memory'
d = retriever.query(q)
generation = rag_chain.invoke({"context": d, "question": q})
print(generation)

The agent's memory is a long-term memory module that records a comprehensive list of agents' experiences in natural language.


In [121]:
q='Explain Task Decomposition in depth?'
d = retriever.query(q)
generation = rag_chain.invoke({"context": d, "question": q})
print(generation)

Task decomposition is a process of breaking down a complex task into smaller, more manageable subtasks. This is often necessary when dealing with complex problems that require multiple steps to solve. In the context of LLM-powered autonomous agents, task decomposition is used to decompose a task into smaller, more manageable steps that the LLM can understand and execute. This can be done through various methods, such as using simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", or using task-specific instructions.


In [122]:
q='Explain chain of thought?'
d = retriever.query(q)
generation = rag_chain.invoke({"context": d, "question": q})
print(generation)

Chain of thought (CoT) prompting generates a sequence of short sentences to describe reasoning logics step by step, known as reasoning chains or rationales, to eventually lead to the final answer. This technique is more pronounced for complicated reasoning tasks, while using large models (e.g., with more than 50B parameters).
