Error when generating text with the Anthropic LLM #2929

adarshxs · 2023-04-15T05:33:09Z

`UnicodeEncodeError Traceback (most recent call last)
Cell In[13], line 11
2 tools = [
3 Tool(
4 name="Intermediate Answer",
(...)
7 )
8 ]
10 self_ask_with_search = initialize_agent(tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True)
---> 11 self_ask_with_search.run("How do I get into an Ivy league college?")

File C:\Python311\Lib\site-packages\langchain\chains\base.py:213, in Chain.run(self, *args, **kwargs)
211 if len(args) != 1:
212 raise ValueError("run supports only one positional argument.")
--> 213 return self(args[0])[self.output_keys[0]]
215 if kwargs and not args:
216 return self(kwargs)[self.output_keys[0]]

File C:\Python311\Lib\site-packages\langchain\chains\base.py:116, in Chain.call(self, inputs, return_only_outputs)
114 except (KeyboardInterrupt, Exception) as e:
115 self.callback_manager.on_chain_error(e, verbose=self.verbose)
--> 116 raise e
117 self.callback_manager.on_chain_end(outputs, verbose=self.verbose)
118 return self.prep_outputs(inputs, outputs, return_only_outputs)

File C:\Python311\Lib\site-packages\langchain\chains\base.py:113, in Chain.call(self, inputs, return_only_outputs)
107 self.callback_manager.on_chain_start(
108 {"name": self.class.name},
109 inputs,
110 verbose=self.verbose,
111 )
112 try:
--> 113 outputs = self._call(inputs)
114 except (KeyboardInterrupt, Exception) as e:
115 self.callback_manager.on_chain_error(e, verbose=self.verbose)

File C:\Python311\Lib\site-packages\langchain\agents\agent.py:499, in _call(self, inputs)
494 """Validate that appropriate tools are passed in."""
495 pass
497 @classmethod
498 def from_llm_and_tools(
--> 499 cls,
500 llm: BaseLanguageModel,
501 tools: Sequence[BaseTool],
502 callback_manager: Optional[BaseCallbackManager] = None,
503 **kwargs: Any,
504 ) -> Agent:
505 """Construct an agent from an LLM and tools."""
506 cls._validate_tools(tools)

File C:\Python311\Lib\site-packages\langchain\agents\agent.py:409, in _take_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps)
399 def plan(
400 self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
401 ) -> Union[AgentAction, AgentFinish]:
402 """Given input, decided what to do.
403
404 Args:
405 intermediate_steps: Steps the LLM has taken to date,
406 along with observations
407 **kwargs: User inputs.
408
--> 409 Returns:
410 Action specifying what tool to use.
411 """
412 full_inputs = self.get_full_inputs(intermediate_steps, **kwargs)
413 action = self._get_next_action(full_inputs)

File C:\Python311\Lib\site-packages\langchain\agents\agent.py:105, in plan(self, intermediate_steps, **kwargs)
97 else:
98 raise ValueError(
99 f"Got unsupported early_stopping_method {early_stopping_method}"
100 )
102 @classmethod
103 def from_llm_and_tools(
104 cls,
--> 105 llm: BaseLanguageModel,
106 tools: Sequence[BaseTool],
107 callback_manager: Optional[BaseCallbackManager] = None,
108 **kwargs: Any,
109 ) -> BaseSingleActionAgent:
110 raise NotImplementedError
112 @Property
113 def _agent_type(self) -> str:

File C:\Python311\Lib\site-packages\langchain\agents\agent.py:71, in _get_next_action(self, full_inputs)
62 @AbstractMethod
63 async def aplan(
64 self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
65 ) -> Union[AgentAction, AgentFinish]:
66 """Given input, decided what to do.
67
68 Args:
69 intermediate_steps: Steps the LLM has taken to date,
70 along with observations
---> 71 **kwargs: User inputs.
72
73 Returns:
74 Action specifying what tool to use.
75 """

File C:\Python311\Lib\site-packages\langchain\chains\llm.py:151, in LLMChain.predict(self, **kwargs)
137 def predict(self, **kwargs: Any) -> str:
138 """Format prompt with kwargs and pass to LLM.
139
140 Args:
(...)
149 completion = llm.predict(adjective="funny")
150 """
--> 151 return self(kwargs)[self.output_key]

File C:\Python311\Lib\site-packages\langchain\chains\base.py:116, in Chain.call(self, inputs, return_only_outputs)
114 except (KeyboardInterrupt, Exception) as e:
115 self.callback_manager.on_chain_error(e, verbose=self.verbose)
--> 116 raise e
117 self.callback_manager.on_chain_end(outputs, verbose=self.verbose)
118 return self.prep_outputs(inputs, outputs, return_only_outputs)

File C:\Python311\Lib\site-packages\langchain\chains\base.py:113, in Chain.call(self, inputs, return_only_outputs)
107 self.callback_manager.on_chain_start(
108 {"name": self.class.name},
109 inputs,
110 verbose=self.verbose,
111 )
112 try:
--> 113 outputs = self._call(inputs)
114 except (KeyboardInterrupt, Exception) as e:
115 self.callback_manager.on_chain_error(e, verbose=self.verbose)

File C:\Python311\Lib\site-packages\langchain\chains\llm.py:57, in LLMChain._call(self, inputs)
56 def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]:
---> 57 return self.apply([inputs])[0]

File C:\Python311\Lib\site-packages\langchain\chains\llm.py:118, in LLMChain.apply(self, input_list)
116 def apply(self, input_list: List[Dict[str, Any]]) -> List[Dict[str, str]]:
117 """Utilize the LLM generate method for speed gains."""
--> 118 response = self.generate(input_list)
119 return self.create_outputs(response)

File C:\Python311\Lib\site-packages\langchain\chains\llm.py:62, in LLMChain.generate(self, input_list)
60 """Generate LLM result from inputs."""
61 prompts, stop = self.prep_prompts(input_list)
---> 62 return self.llm.generate_prompt(prompts, stop)

File C:\Python311\Lib\site-packages\langchain\llms\base.py:107, in BaseLLM.generate_prompt(self, prompts, stop)
103 def generate_prompt(
104 self, prompts: List[PromptValue], stop: Optional[List[str]] = None
105 ) -> LLMResult:
106 prompt_strings = [p.to_string() for p in prompts]
--> 107 return self.generate(prompt_strings, stop=stop)

File C:\Python311\Lib\site-packages\langchain\llms\base.py:140, in BaseLLM.generate(self, prompts, stop)
138 except (KeyboardInterrupt, Exception) as e:
139 self.callback_manager.on_llm_error(e, verbose=self.verbose)
--> 140 raise e
141 self.callback_manager.on_llm_end(output, verbose=self.verbose)
142 return output

File C:\Python311\Lib\site-packages\langchain\llms\base.py:137, in BaseLLM.generate(self, prompts, stop)
133 self.callback_manager.on_llm_start(
134 {"name": self.class.name}, prompts, verbose=self.verbose
135 )
136 try:
--> 137 output = self._generate(prompts, stop=stop)
138 except (KeyboardInterrupt, Exception) as e:
139 self.callback_manager.on_llm_error(e, verbose=self.verbose)

File C:\Python311\Lib\site-packages\langchain\llms\base.py:324, in LLM._generate(self, prompts, stop)
322 generations = []
323 for prompt in prompts:
--> 324 text = self._call(prompt, stop=stop)
325 generations.append([Generation(text=text)])
326 return LLMResult(generations=generations)

File C:\Python311\Lib\site-packages\langchain\llms\anthropic.py:146, in _call(self, prompt, stop)
130 def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
131 r"""Call out to Anthropic's completion endpoint.
132
133 Args:
134 prompt: The prompt to pass into the model.
135 stop: Optional list of stop words to use when generating.
136
137 Returns:
138 The string generated by the model.
139
140 Example:
141 .. code-block:: python
142
143 prompt = "What are the biggest risks facing humanity?"
144 prompt = f"\n\nHuman: {prompt}\n\nAssistant:"
145 response = model(prompt)
--> 146
147 """
148 stop = self._get_anthropic_stop(stop)
149 if self.streaming:

File C:\Python311\Lib\site-packages\anthropic\api.py:239, in Client.completion(self, **kwargs)
238 def completion(self, **kwargs) -> dict:
--> 239 return self._request_as_json(
240 "post",
241 "/v1/complete",
242 params=kwargs,
243 )

File C:\Python311\Lib\site-packages\anthropic\api.py:198, in Client._request_as_json(self, *args, **kwargs)
197 def _request_as_json(self, *args, **kwargs) -> dict:
--> 198 result = self._request_raw(*args, **kwargs)
199 content = result.content.decode("utf-8")
200 json_body = json.loads(content)

File C:\Python311\Lib\site-packages\anthropic\api.py:117, in Client._request_raw(self, method, path, params, headers, request_timeout)
109 def _request_raw(
110 self,
111 method: str,
(...)
115 request_timeout: Optional[Union[float, Tuple[float, float]]] = None,
116 ) -> requests.Response:
--> 117 request = self._request_params(headers, method, params, path, request_timeout)
118 result = self._session.request(
119 request.method,
120 request.url,
(...)
124 timeout=request.timeout,
125 )
127 if result.status_code != 200:

File C:\Python311\Lib\site-packages\anthropic\api.py:85, in Client._request_params(self, headers, method, params, path, request_timeout)
79 del params["disable_checks"]
80 else:
81 # NOTE: disabling_checks can lead to very poor sampling quality from our API.
82 # Please read the docs on "Claude instructions when using the API" before disabling this.
83 # Also note, future versions of the API will enforce these as hard constraints automatically,
84 # so please consider these SDK-side checks as things you'll need to handle regardless.
---> 85 _validate_request(params)
86 data = None
87 if params:

File C:\Python311\Lib\site-packages\anthropic\api.py:273, in _validate_request(params)
271 if prompt.endswith(" "):
272 raise ApiException(f"Prompt must not end with a space character")
--> 273 _validate_prompt_length(params)

File C:\Python311\Lib\site-packages\anthropic\api.py:279, in _validate_prompt_length(params)
277 prompt: str = params["prompt"]
278 try:
--> 279 prompt_tokens = tokenizer.count_tokens(prompt)
280 max_tokens_to_sample: int = params["max_tokens_to_sample"]
281 token_limit = 9 * 1024

File C:\Python311\Lib\site-packages\anthropic\tokenizer.py:52, in count_tokens(text)
51 def count_tokens(text: str) -> int:
---> 52 tokenizer = get_tokenizer()
53 encoded_text = tokenizer.encode(text)
54 return len(encoded_text.ids)

File C:\Python311\Lib\site-packages\anthropic\tokenizer.py:36, in get_tokenizer()
34 if not claude_tokenizer:
35 try:
---> 36 tokenizer_data = _get_cached_tokenizer_file_as_str()
37 except httpx.HTTPError as e:
38 raise TokenizerException(f'Failed to download tokenizer: {e}')

File C:\Python311\Lib\site-packages\anthropic\tokenizer.py:26, in _get_cached_tokenizer_file_as_str()
24 response.raise_for_status()
25 with open(tokenizer_file, 'w') as f:
---> 26 f.write(response.text)
28 with open(tokenizer_file, 'r') as f:
29 return f.read()

File C:\Python311\Lib\encodings\cp1252.py:19, in IncrementalEncoder.encode(self, input, final)
18 def encode(self, input, final=False):
---> 19 return codecs.charmap_encode(input,self.errors,encoding_table)[0]

UnicodeEncodeError: 'charmap' codec can't encode character '\u0100' in position 2452: character maps to `

The text was updated successfully, but these errors were encountered:

orzhan · 2023-04-17T11:00:59Z

I got this error, and fixed by patching
with open(tokenizer_file, 'w') as f:
to
with open(tokenizer_file, 'w', encoding='utf-8') as f:

adarshxs closed this as completed Aug 6, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Error when generating text with the Anthropic LLM #2929

Error when generating text with the Anthropic LLM #2929

adarshxs commented Apr 15, 2023

orzhan commented Apr 17, 2023

Error when generating text with the Anthropic LLM #2929

Error when generating text with the Anthropic LLM #2929

Comments

adarshxs commented Apr 15, 2023

orzhan commented Apr 17, 2023