In [1]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key:")

Enter your OpenAI API Key:··········


In [2]:
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass('Enter your LangSmith API key: ')

Enter your LangSmith API key: ··········


In [3]:
from uuid import uuid4

unique_id = uuid4().hex[0:8]

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"Managing_Agent_Prompt_Size_{unique_id}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

In [5]:
pip install langchain langchain-openai langchain-community

Collecting langchain
  Downloading langchain-0.2.3-py3-none-any.whl (974 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.0/974.0 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-openai
  Downloading langchain_openai-0.1.8-py3-none-any.whl (38 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.4-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_core-0.2.5-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.7/314.7 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.1-py3-none-any.whl (23 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.76-py3-none-any.whl (124

In [6]:
from operator import itemgetter

from langchain.agents import AgentExecutor, load_tools
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
from langchain_community.utilities import WikipediaAPIWrapper, DuckDuckGoSearchAPIWrapper
from langchain_core.prompt_values import ChatPromptValue
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI

# Managing Prompt Length

- Agents dynamically use tools to gather information, adding the results to their prompts.

- This can lead to large prompts that exceed the model's context window.

- LCEL allows for custom functions to manage prompt size within an agent.

- Example: An agent that searches Wikipedia for information.

- It's important to trim the prompt by keeping only necessary information and removing the rest.

In [10]:
pip install wikipedia duckduckgo-search

Collecting duckduckgo-search
  Downloading duckduckgo_search-6.1.5-py3-none-any.whl (24 kB)
Collecting pyreqwest-impersonate>=0.4.7 (from duckduckgo-search)
  Downloading pyreqwest_impersonate-0.4.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyreqwest-impersonate, duckduckgo-search
Successfully installed duckduckgo-search-6.1.5 pyreqwest-impersonate-0.4.7


In [11]:
wiki = WikipediaQueryRun(
    api_wrapper=WikipediaAPIWrapper(top_k_results=10, doc_content_chars_max=10_000)
)

ddg_search = search = DuckDuckGoSearchRun(
    api_wrapper=DuckDuckGoSearchAPIWrapper(region="us-en", time="d", max_results=5)
)

tools = [ddg_search, wiki ]

In [12]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """You are the world's greatest research assistant. You know exactly
        where and what to search for given a query. You've been a research assistant to
        people like Yann LeCun, Geoffry Hinton, and Francois Chollet."""),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

llm = ChatOpenAI(model="gpt-3.5-turbo")

In [13]:
agent = (
    {
        "input": itemgetter("input"),
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm.bind_functions(tools)
    | OpenAIFunctionsAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [14]:
agent_executor.invoke(
    {
        "input": """Why does deep learning work? Is there some inherent property
        of a model's architecture that makes it capable of learning? Or is it more
        about the data it is trained on?  Are there any theorems or hypothesis that you can find that
        support this? Is there something special about how humans generate data?
        What philosophical implications does this have about why deep learning works.
        How will people in the future talk about what deep learning is?"""
    }
)



[1m> Entering new AgentExecutor chain...[0m


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
def condense_prompt(prompt: ChatPromptValue, max_tokens: int = 4_000) -> ChatPromptValue:
    """
    Condenses the input prompt to ensure the total number of tokens does not exceed a specified limit.

    This function processes a ChatPromptValue object's messages to reduce the total token count to
    within a specified limit. It progressively removes messages from the beginning of the AI function-related
    messages until the total token count is under the specified limit, while always preserving the first
    two messages for essential context or instructions.

    Parameters:
    - prompt (ChatPromptValue): The input ChatPromptValue object containing a sequence of messages.
    - max_tokens (int, optional): The maximum number of tokens allowed for the condensed prompt. Defaults to 4,000.

    Returns:
    - ChatPromptValue: A new ChatPromptValue object with the condensed sequence of messages, ensuring
      the total token count is within the specified limit.

    Note:
    - This function is useful for scenarios where the prompt for an AI model exceeds the maximum token limit,
      allowing for the inclusion of necessary context while staying within token constraints.
    """
    messages = prompt.to_messages()
    num_tokens = llm.get_num_tokens_from_messages(messages)
    ai_function_messages = messages[2:]

    while num_tokens > max_tokens:
        ai_function_messages = ai_function_messages[2:]
        num_tokens = llm.get_num_tokens_from_messages(
            messages[:2] + ai_function_messages
        )

    messages = messages[:2] + ai_function_messages
    return ChatPromptValue(messages=messages)


In [None]:
agent = (
    {
        "input": itemgetter("input"),
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | condense_prompt
    | llm.bind_functions(tools)
    | OpenAIFunctionsAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

agent_executor.invoke(
    {
        "input": """Why does deep learning work? Is there some inherent property
        of a model's architecture that makes it capable of learning? Or is it more
        about the data it is trained on?  Are there any theorems or hypothesis that you can find that
        support this? Is there something special about how humans generate data?
        What philosophical implications does this have about why deep learning works.
        How will people in the future talk about what deep learning is?"""
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `duckduckgo_search` with `{'query': 'Why does deep learning work?'}`


[0m

  with DDGS() as ddgs:


[36;1m[1;3mDeep learning is a branch of machine learning that models high level abstractions in data by using a deep graph with many processing layers. According to the Universal approximation theorem , deep-ness isn't necessary for a neural network to be able to approximate arbitrary continuous functions. A phrenological mapping of the brain by Friedrich Eduard Bilz.Phrenology, a pseudoscience, was among the first attempts to correlate mental functions with specific parts of the brain.. The mind (adjective form: mental) is that which thinks, imagines, remembers, wills, and senses, or is the set of faculties responsible for such phenomena. The mind is also associated with experiencing perception ... English is either the official language or one of the official languages in 59 sovereign states (such as in India, Ireland, and Canada). In some other countries, it is the sole or dominant language for historical reasons without being explicitly defined by law (such as in the United State

{'input': "Why does deep learning work? Is there some inherent property\n        of a model's architecture that makes it capable of learning? Or is it more\n        about the data it is trained on?  Are there any theorems or hypothesis that you can find that\n        support this? Is there something special about how humans generate data?\n        What philosophical implications does this have about why deep learning works.\n        How will people in the future talk about what deep learning is?",
 'output': "Deep learning works due to a combination of factors related to the model's architecture and the data it is trained on. One important aspect is the universal approximation theorem, which states that neural networks with a sufficient number of parameters can approximate any continuous function. This theorem"}