In [1]:
import os 
import openai 
from dotenv import load_dotenv 

load_dotenv() 

openai.api_key = os.getenv("OPENAI_API_KEY")  
model_name = "gpt-3.5-turbo"

In [2]:
from typing import List 
from pydantic import BaseModel, Field 
from langchain.utils.openai_functions import convert_pydantic_to_openai_function 

In [3]:
class Tagging(BaseModel): 
    """Tag the piece of text with particular info""" 
    sentiment: str = Field(description="sentiment of text, should be `pos`, `neg`, or `neutral`")
    language: str = Field(description="language of text (should be ISO 639-1 code)")
convert_pydantic_to_openai_function(Tagging)

{'name': 'Tagging',
 'description': 'Tag the piece of text with particular info',
 'parameters': {'properties': {'sentiment': {'description': 'sentiment of text, should be `pos`, `neg`, or `neutral`',
    'type': 'string'},
   'language': {'description': 'language of text (should be ISO 639-1 code)',
    'type': 'string'}},
  'required': ['sentiment', 'language'],
  'type': 'object'}}

In [4]:
from langchain.prompts import ChatPromptTemplate 
from langchain_openai import ChatOpenAI 

In [5]:
model = ChatOpenAI(temperature=0, model=model_name) 

In [6]:
tagging_functions = [convert_pydantic_to_openai_function(Tagging)]

In [7]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Think carefully, and then tag the text as instructed"),
    ("user", "{input}")
])

In [8]:
model_with_functions = model.bind(
    functions=tagging_functions,
    function_call={"name": "Tagging"}
) 
tagging_chain = prompt | model_with_functions 

In [9]:
tagging_chain.invoke({"input": "I love langchain"}) 

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"sentiment":"pos","language":"en"}', 'name': 'Tagging'}}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 108, 'total_tokens': 118}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-fee48ea3-082d-4f4f-9533-24643ef87cb5-0', usage_metadata={'input_tokens': 108, 'output_tokens': 10, 'total_tokens': 118})

In [10]:
tagging_chain.invoke({"input": "non mi piace questo cibo"})

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"sentiment":"neg","language":"it"}', 'name': 'Tagging'}}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 111, 'total_tokens': 121}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5850d6d8-13dc-4d8f-b78c-e05f89c359b0-0', usage_metadata={'input_tokens': 111, 'output_tokens': 10, 'total_tokens': 121})

In [13]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
tagging_chain = prompt | model_with_functions | JsonOutputFunctionsParser()

In [14]:
tagging_chain.invoke({"input": "non mi piace questo cibo"})

{'sentiment': 'neg', 'language': 'it'}

In [15]:
from typing import Optional 
class Person(BaseModel): 
    """Information about a person""" 
    name: str = Field(description="person's name") 
    age: Optional[int] = Field(description="person") 

In [16]:
class Information(BaseModel): 
    """Information to extract""" 
    people: List[Person] = Field(description="List of info about people")

In [17]:
convert_pydantic_to_openai_function(Information)

{'name': 'Information',
 'description': 'Information to extract',
 'parameters': {'$defs': {'Person': {'description': 'Information about a person',
    'properties': {'name': {'description': "person's name", 'type': 'string'},
     'age': {'anyOf': [{'type': 'integer'}, {'type': 'null'}],
      'description': 'person'}},
    'required': ['name', 'age'],
    'type': 'object'}},
  'properties': {'people': {'description': 'List of info about people',
    'items': {'description': 'Information about a person',
     'properties': {'name': {'description': "person's name", 'type': 'string'},
      'age': {'anyOf': [{'type': 'integer'}, {'type': 'null'}],
       'description': 'person'}},
     'required': ['name', 'age'],
     'type': 'object'},
    'type': 'array'}},
  'required': ['people'],
  'type': 'object'}}

In [18]:
extraction_functions = [convert_pydantic_to_openai_function(Information)]
extraction_model = model.bind(functions=extraction_functions, function_call={"name": "Information"})

In [19]:
extraction_model

RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x00000250EA337310>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x00000250EADE1910>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), kwargs={'functions': [{'name': 'Information', 'description': 'Information to extract', 'parameters': {'$defs': {'Person': {'description': 'Information about a person', 'properties': {'name': {'description': "person's name", 'type': 'string'}, 'age': {'anyOf': [{'type': 'integer'}, {'type': 'null'}], 'description': 'person'}}, 'required': ['name', 'age'], 'type': 'object'}}, 'properties': {'people': {'description': 'List of info about people', 'items': {'description': 'Information about a person', 'properties': {'name': {'description': "person's name", 'type': 'string'}, 'age': {'anyOf': [{'type': 'integer'}, {'type': 'null'}], 'description': 'person'}}, 'required': ['name', 'age'], 'type': 'object'}, '

In [20]:
extraction_model.invoke("Joe is 30, his mom is Martha")

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"people":[{"name":"Joe","age":30},{"name":"Martha","age":null}]}', 'name': 'Information'}}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 93, 'total_tokens': 114}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c95b99e1-8368-4cfa-820f-a92e7646780e-0', usage_metadata={'input_tokens': 93, 'output_tokens': 21, 'total_tokens': 114})

In [22]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevat information, if not explicitly provided, do not guess. Extract partial info"), 
    ("human", "{input}")
]) 
extraction_chain = prompt | extraction_model 

In [23]:
extraction_chain.invoke({"input": "Joe is 30, his mom is Martha"}) 

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"people":[{"name":"Joe","age":30},{"name":"Martha","age":null}]}', 'name': 'Information'}}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 112, 'total_tokens': 133}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0c991168-03f7-4db3-8f1a-0cc3aa310722-0', usage_metadata={'input_tokens': 112, 'output_tokens': 21, 'total_tokens': 133})

In [24]:
extraction_chain = prompt | extraction_model | JsonOutputFunctionsParser() 
extraction_chain.invoke({"input": "Joe is 30, his mom is Martha"})


{'people': [{'name': 'Joe', 'age': 30}, {'name': 'Martha', 'age': None}]}

In [25]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser 

In [26]:
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="people")

In [27]:
extraction_chain.invoke({"input": "Joe is 30, his mom is Martha"})

[{'name': 'Joe', 'age': 30}, {'name': 'Martha', 'age': None}]

In [28]:
# Doing it for real 
from langchain.document_loaders import WebBaseLoader 
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
documents = loader.load() 

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [31]:
doc = documents[0] 
page_content = doc.page_content[:10000] 
print(page_content[:10000])







LLM Powered Autonomous Agents | Lil'Log







































Lil'Log






















Posts




Archive




Search




Tags




FAQ




emojisearch.app









      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


 


Table of Contents



Agent System Overview

Component One: Planning

Task Decomposition

Self-Reflection


Component Two: Memory

Types of Memory

Maximum Inner Product Search (MIPS)


Component Three: Tool Use

Case Studies

Scientific Discovery Agent

Generative Agents Simulation

Proof-of-Concept Examples


Challenges

Citation

References





Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general

In [32]:
class Overview(BaseModel):
    """Overview of a section of text."""
    summary: str = Field(description="Provide a concise summary of the content.")
    language: str = Field(description="Provide the language that the content is written in.")
    keywords: str = Field(description="Provide keywords related to the content.")

In [33]:
overview_tagging_function = [
    convert_pydantic_to_openai_function(Overview)
]
tagging_model = model.bind(
    functions=overview_tagging_function,
    function_call={"name": "Overview"} 
)
tagging_chain = prompt | tagging_model | JsonOutputFunctionsParser()

In [34]:
tagging_chain.invoke({"input": page_content})

{'summary': 'This article discusses building autonomous agents powered by LLM (large language model) as the core controller, focusing on planning, memory, and tool use components.',
 'language': 'English',
 'keywords': 'LLM, autonomous agents, planning, memory, tool use, task decomposition, self-reflection'}

In [35]:
class Paper(BaseModel): 
    """Information about papers mentioned"""
    title: str 
    author: Optional[str] 

class Info(BaseModel): 
    """Information to extract""" 
    papers: List[Paper] 

In [37]:
paper_extraction_function = [
    convert_pydantic_to_openai_function(Info)
]
extraction_model = model.bind(
    functions=paper_extraction_function, 
    function_call={"name":"Info"}
)
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="papers")

In [39]:
extraction_chain.invoke({"input": page_content})

[{'title': 'LLM Powered Autonomous Agents', 'author': 'Lilian Weng'}]

In [40]:
template = """A article will be passed to you. Extract from it all papers that are mentioned by this article. 

Do not extract the name of the article itself. If no papers are mentioned that's fine - you don't need to extract any! Just return an empty list.

Do not make up or guess ANY extra information. Only extract what exactly is in the text."""

prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", "{input}")
])

In [42]:
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="papers") 
extraction_chain.invoke({"input": page_content})

[{'title': 'Chain of thought (CoT; Wei et al. 2022)', 'author': None},
 {'title': 'Tree of Thoughts (Yao et al. 2023)', 'author': None},
 {'title': 'LLM+P (Liu et al. 2023)', 'author': None},
 {'title': 'ReAct (Yao et al. 2023)', 'author': None},
 {'title': 'Reflexion (Shinn & Labash 2023)', 'author': None},
 {'title': 'Chain of Hindsight (CoH; Liu et al. 2023)', 'author': None},
 {'title': 'Algorithm Distillation (AD; Laskin et al. 2023)', 'author': None}]

In [43]:
extraction_chain.invoke({"input": "hi"})

[]

In [44]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0)

In [45]:
splits = text_splitter.split_text(doc.page_content)

In [46]:
len(splits)

15

In [48]:
def flatten(matrix):
    flat_list = []
    for row in matrix:
        flat_list += row
    return flat_list
flatten([[1, 2], [3, 4]])

[1, 2, 3, 4]

In [49]:
print(splits[0])

LLM Powered Autonomous Agents | Lil'Log







































Lil'Log






















Posts




Archive




Search




Tags




FAQ




emojisearch.app









      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


 


Table of Contents



Agent System Overview

Component One: Planning

Task Decomposition

Self-Reflection


Component Two: Memory

Types of Memory

Maximum Inner Product Search (MIPS)


Component Three: Tool Use

Case Studies

Scientific Discovery Agent

Generative Agents Simulation

Proof-of-Concept Examples


Challenges

Citation

References





Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general probl

In [50]:
from langchain.schema.runnable import RunnableLambda

In [51]:
prep = RunnableLambda(
    lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]
)

In [52]:
prep.invoke("hi")

[{'input': 'hi'}]

In [53]:
chain = prep | extraction_chain.map() | flatten

In [54]:
chain.invoke(doc.page_content)

[{'title': 'AutoGPT', 'author': None},
 {'title': 'GPT-Engineer', 'author': None},
 {'title': 'BabyAGI', 'author': None},
 {'title': 'Chain of thought', 'author': 'Wei et al. 2022'},
 {'title': 'Tree of Thoughts', 'author': 'Yao et al. 2023'},
 {'title': 'LLM+P', 'author': 'Liu et al. 2023'},
 {'title': 'ReAct', 'author': 'Yao et al. 2023'},
 {'title': 'Reflexion', 'author': 'Shinn & Labash 2023'},
 {'title': 'Chain of Hindsight', 'author': 'Liu et al. 2023'},
 {'title': 'Algorithm Distillation', 'author': 'Laskin et al. 2023'},
 {'title': 'Laskin et al. 2023', 'author': None},
 {'title': 'Miller 1956', 'author': None},
 {'title': 'Duan et al. 2017', 'author': None},
 {'title': 'Google Blog', 'author': None},
 {'title': 'MRKL (Karpas et al. 2022)', 'author': None},
 {'title': 'TALM (Tool Augmented Language Models; Parisi et al. 2022)',
  'author': None},
 {'title': 'Toolformer (Schick et al. 2023)', 'author': None},
 {'title': 'HuggingGPT (Shen et al. 2023)', 'author': None},
 {'title'