Skip to content

UnicodeEncodeError in the third get_started tutorial #1992

@ker2xu

Description

@ker2xu

[Y] I have checked the documentation and related resources and couldn't resolve my bug.

Describe the bug
A clear and concise description of what the bug is.

Ragas version: 0.2.14
Python version: 3.12

Code to Reproduce
Share code to reproduce the issue

dataset.upload()

Error trace

UnicodeEncodeError Traceback (most recent call last)
Cell In[5], line 1
----> 1 dataset.upload()

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/ragas/testset/synthesizers/testset_schema.py:141, in Testset.upload(self, verbose)
139 def upload(self, verbose: bool = True) -> str:
140 packet = TestsetPacket(samples_original=self.samples, run_id=self.run_id)
--> 141 response = upload_packet(
142 path="/alignment/testset",
143 data_json_string=packet.model_dump_json(),
144 )
145 app_url = get_app_url()
147 testset_endpoint = f"{app_url}/dashboard/alignment/testset/{self.run_id}"

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/ragas/sdk.py:102, in upload_packet(path, data_json_string)
99 print(f" {data_json_string}")
100 print(section_delimiter)
--> 102 response = requests.post(
103 f"{base_url}/api/v1{path}",
104 data=data_json_string,
105 headers=headers,
106 timeout=(connection_timeout, read_timeout),
107 )
109 if enable_http_log:
110 try:

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/api.py:115, in post(url, data, json, **kwargs)
103 def post(url, data=None, json=None, **kwargs):
104 r"""Sends a POST request.
105
106 :param url: URL for the new :class:Request object.
(...) 112 :rtype: requests.Response
113 """
--> 115 return request("post", url, data=data, json=json, **kwargs)

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/api.py:59, in request(method, url, **kwargs)
55 # By using the 'with' statement we are sure the session is closed, thus we
56 # avoid leaving sockets open which can trigger a ResourceWarning in some
57 # cases, and look like a memory leak in others.
58 with sessions.Session() as session:
---> 59 return session.request(method=method, url=url, **kwargs)

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
664 timeout = TimeoutSauce(connect=timeout, read=timeout)
666 try:
--> 667 resp = conn.urlopen(
668 method=request.method,
669 url=url,
670 body=request.body,
671 headers=request.headers,
672 redirect=False,
673 assert_same_host=False,
674 preload_content=False,
675 decode_content=False,
676 retries=self.max_retries,
677 timeout=timeout,
678 chunked=chunked,
679 )
681 except (ProtocolError, OSError) as err:
682 raise ConnectionError(err, request=request)

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/urllib3/connectionpool.py:715, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
712 self._prepare_proxy(conn)
714 # Make the request on the httplib connection object.
--> 715 httplib_response = self._make_request(
716 conn,
717 method,
718 url,
719 timeout=timeout_obj,
720 body=body,
721 headers=headers,
722 chunked=chunked,
723 )
725 # If we're going to release the connection in finally:, then
726 # the response doesn't need to know about the connection. Otherwise
727 # it will also try to release it and we'll have a double-release
728 # mess.
729 response_conn = conn if not release_conn else None

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/urllib3/connectionpool.py:416, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
414 conn.request_chunked(method, url, **httplib_request_kw)
415 else:
--> 416 conn.request(method, url, **httplib_request_kw)
418 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
419 # legitimately able to close the connection after sending a valid response.
420 # With this behaviour, the received response is still readable.
421 except BrokenPipeError:
422 # Python 3

File ~/miniforge3/envs/lc/lib/python3.12/site-packages/urllib3/connection.py:244, in HTTPConnection.request(self, method, url, body, headers)
242 if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
243 headers["User-Agent"] = _get_default_user_agent()
--> 244 super(HTTPConnection, self).request(method, url, body=body, headers=headers)

File ~/miniforge3/envs/lc/lib/python3.12/http/client.py:1338, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1335 def request(self, method, url, body=None, headers={}, *,
1336 encode_chunked=False):
1337 """Send a complete request to the server."""
-> 1338 self._send_request(method, url, body, headers, encode_chunked)

File ~/miniforge3/envs/lc/lib/python3.12/http/client.py:1383, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1379 self.putheader(hdr, value)
1380 if isinstance(body, str):
1381 # RFC 2616 Section 3.7.1 says that text default has a
1382 # default charset of iso-8859-1.
-> 1383 body = _encode(body, 'body')
1384 self.endheaders(body, encode_chunked=encode_chunked)

File ~/miniforge3/envs/lc/lib/python3.12/http/client.py:166, in _encode(data, name)
164 return data.encode("latin-1")
165 except UnicodeEncodeError as err:
--> 166 raise UnicodeEncodeError(
167 err.encoding,
168 err.object,
169 err.start,
170 err.end,
171 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
172 "if you want to send it encoded in UTF-8." %
173 (name.title(), data[err.start:err.end], name)) from None

UnicodeEncodeError: 'latin-1' codec can't encode character '\u2019' in position 2654: Body ('’') is not valid Latin-1. Use body.encode('utf-8') if you want to send it encoded in UTF-8.

Expected behavior
A clear and concise description of what you expected to happen.

The example dataset should be uploaded smoothly.
Everything should be robust at least in the get_started docs.

Additional context
Add any other context about the problem here.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions