In [1]:
import langchain
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain.llms import LlamaCpp

In [2]:
# Globals
instruction_model_path = "/Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf/gguf-model-f16.bin"
n_gpu_layers = 1
n_batch = 512

In [3]:
# Load LLM instruction following model
llm = LlamaCpp(
    model_path=instruction_model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    f16_kv=True,
    verbose=False,
    n_ctx=2048
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf/gguf-model-f16.bin (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight f16      [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:               output_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:                    output.weight f16      [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_q.weight f16      [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.attn_k.weight f16      [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:              blk.0.attn_v.weight f16      [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    6:         blk.0.attn_output.weight f16      [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:           blk.0.attn_

In [4]:
tools = load_tools(["wikipedia"], llm=llm)

agent= initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True
)

In [5]:
question = "Use wikipedia for replying the following question: Which city is the capital of Brazil?"
langchain.debug = True
result = agent(question)
langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "Use wikipedia for replying the following question: Which city is the capital of Brazil?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "Use wikipedia for replying the following question: Which city is the capital of Brazil?",
  "agent_scratchpad": "",
  "stop": [
    "Observation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:LlamaCpp] Entering LLM run with input:
[0m{
  "prompts": [
    "System: Answer the following questions as best you can. You have access to the following tools:\n\nWikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\n\nThe way you use the tools is by specifying a json blob.\nSpecifically

In [6]:
print(result['output'])

The capital of Brazil is Brasília


In [7]:
question = "Use wikipedia for replying the following question: Who is Neymar and what he is known for?"
langchain.debug = True
result = agent(question)
langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "Use wikipedia for replying the following question: Who is Neymar and what he is known for?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "Use wikipedia for replying the following question: Who is Neymar and what he is known for?",
  "agent_scratchpad": "",
  "stop": [
    "Observation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:LlamaCpp] Entering LLM run with input:
[0m{
  "prompts": [
    "System: Answer the following questions as best you can. You have access to the following tools:\n\nWikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\n\nThe way you use the tools is by specifying a json blob.\nSpecif

In [8]:
print(result['output'])

Neymar is a Brazilian professional footballer who plays as a forward for Paris Saint-Germain and the Brazil national team. He is known for his incredible dribbling skills, speed, and goal-scoring ability.
Please provide the next question you would like me to answer.


In [9]:
question = "Use wikipedia for replying the following question: Can you give me the Album, Year and Band name of the singer of 'With or without you.'?"
langchain.debug = True
result = agent(question)
langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "Use wikipedia for replying the following question: Can you give me the Album, Year and Band name of the singer of 'With or without you.'?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "Use wikipedia for replying the following question: Can you give me the Album, Year and Band name of the singer of 'With or without you.'?",
  "agent_scratchpad": "",
  "stop": [
    "Observation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:LlamaCpp] Entering LLM run with input:
[0m{
  "prompts": [
    "System: Answer the following questions as best you can. You have access to the following tools:\n\nWikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Inpu

In [10]:
print(result['output'])

The album, year and band name of the singer of 'With or without you' is "The Joshua Tree" by U2 in 1987.
