In [1]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = <langchain>

In [2]:
os.environ['OPENAI_API_KEY'] = <openai-key>

In [3]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [5]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [6]:
# Embed
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())

In [44]:
retriever = vectorstore.as_retriever()

#### RETRIEVAL and GENERATION ####

In [8]:
similarity = vectorstore.similarity_search(query="ANNOY")

In [17]:
answer = [content.page_content for content in similarity] 

In [24]:
(" ".join(answer)).replace("}", "").replace("]", "").replace("{", "")

'\n\nThen after these clarification, the agent moved into the code writing mode with a different system message.\nSystem message: \n\nChallenges#\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations: Fig. 4. Experiments on AlfWorld Env and HotpotQA. Hallucination is a more common failure than inefficient planning in AlfWorld. (Image source: Shinn & Labash, 2023) Resources:\n1. Internet access for searches and information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n2. Constructively self-criticize your big-picture behavior constantly.\n3. Reflect on past decisions and strategies to refine your approach.\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.'

In [43]:
# Prompt
prompt = hub.pull("rlm/rag-prompt")

In [47]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [45]:
# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [48]:
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x707a7842fa70>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x707a784319d0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')

In [46]:
# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [49]:
# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [50]:
rag_chain

{
  context: VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x707a97794e30>)
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x707a7842fa70>, async_client=<openai.resources.chat.completions.A

In [56]:
# Question
rag_chain.invoke("What is the Memory?")

'Memory is the processes used to acquire, store, retain, and later retrieve information. It includes sensory memory, short-term memory, and long-term memory. Short-term memory stores information currently needed for cognitive tasks, while long-term memory can store information for a long time with an unlimited capacity.'

In [65]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [58]:
import tiktoken

In [59]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [60]:
num_tokens_from_string(question, "cl100k_base")

8

In [61]:
embd = OpenAIEmbeddings()

In [62]:
embd

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x707a7842bf20>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x707a78434a40>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [63]:
query_result = embd.embed_query(question)

In [66]:
document_result = embd.embed_query(document)

In [67]:
len(query_result)

1536

In [68]:
import numpy as np

In [69]:
def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)


In [70]:
similarity1 = cosine_similarity(query_result, document_result)

In [72]:
print("Cosine Similarity:", similarity1)

Cosine Similarity: 0.8807044730847652


In [73]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

In [74]:
blog_docs = loader.load()

In [75]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [76]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)


In [77]:
# Make splits
splits = text_splitter.split_documents(blog_docs)

In [78]:
# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

In [79]:
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())

In [80]:
retriever = vectorstore.as_retriever()

In [81]:
# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

In [82]:
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())

In [83]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

In [86]:
docs = retriever.invoke("What is Task Decomposition?")

In [87]:
len(docs)

1

In [89]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [90]:
# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

In [91]:
prompt = ChatPromptTemplate.from_template(template)

In [92]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'))])

In [93]:
# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [94]:
# Chain
chain = prompt | llm

In [95]:
chain

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x707a7869d370>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x707a78430650>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')

In [96]:
# Run
chain.invoke({"context":docs,"question":"What is Task Decomposition?"})

AIMessage(content='Task Decomposition is a technique used by agents to break down complex tasks into smaller and simpler steps, allowing them to plan ahead and effectively complete the overall task. This can be achieved through prompting techniques like Chain of Thought and Tree of Thoughts, which help the agent decompose the problem into manageable steps and explore multiple reasoning possibilities at each step. Task decomposition can be done using simple prompting by LLM, task-specific instructions, or with human inputs.', response_metadata={'token_usage': {'completion_tokens': 89, 'prompt_tokens': 331, 'total_tokens': 420}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-1da0b435-18fa-4c44-a00d-20e00f0db689-0', usage_metadata={'input_tokens': 331, 'output_tokens': 89, 'total_tokens': 420})

In [97]:
from langchain import hub