From 3830d3d6e3be3607a5d7f9b79e878d3eaf0cae92 Mon Sep 17 00:00:00 2001 From: hantmac Date: Mon, 6 May 2024 22:34:47 +0800 Subject: [PATCH 1/5] fix: encoding uploaded stage file as utf8 --- databend_py/uploader.py | 2 +- tests/test_client.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/databend_py/uploader.py b/databend_py/uploader.py index 14411b4..b0b80d0 100644 --- a/databend_py/uploader.py +++ b/databend_py/uploader.py @@ -97,7 +97,7 @@ def _upload_to_presigned_url(self, presigned_url, headers, data): raise Exception('data is not bytes, File, or a list: %s' % type(data)) start_time = time.time() try: - resp = requests.put(presigned_url, headers=headers, data=buf) + resp = requests.put(presigned_url, headers=headers, data=buf.encode('utf-8')) resp.raise_for_status() finally: if self._debug: diff --git a/tests/test_client.py b/tests/test_client.py index 93fc40f..267ba05 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -198,6 +198,13 @@ def test_null_to_none(self): _, data = client.execute("select NULL as test") self.assertIsNone(data[0][0]) + def test_special_chars(self): + client = Client.from_url(self.databend_url) + client.execute("create or replace table test_special_chars (x string)") + client.execute("INSERT INTO test_special_chars (x) VALUES", [('ó')]) + _, data = client.execute("select * from test_special_chars") + self.assertEqual(data, [('ó')]) + def test_set_query_id_header(self): os.environ["ADDITIONAL_HEADERS"] = "X-DATABENDCLOUD-TENANT=TENANT,X-DATABENDCLOUD-WAREHOUSE=WAREHOUSE" client = Client.from_url(self.databend_url) From ed7438d479dce7067c6449532dda5c90e816cae6 Mon Sep 17 00:00:00 2001 From: hantmac Date: Mon, 6 May 2024 22:44:17 +0800 Subject: [PATCH 2/5] fix tests --- databend_py/uploader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/databend_py/uploader.py b/databend_py/uploader.py index b0b80d0..711f648 100644 --- a/databend_py/uploader.py +++ b/databend_py/uploader.py @@ -90,14 +90,14 @@ def _upload_to_presigned_url(self, presigned_url, headers, data): buf_size = len(buf) data_len = 1 elif isinstance(data, list): - buf = self._serialize_data(data, self._compress) + buf = self._serialize_data(data, self._compress).encode('utf-8') buf_size = len(buf) data_len = len(data) else: raise Exception('data is not bytes, File, or a list: %s' % type(data)) start_time = time.time() try: - resp = requests.put(presigned_url, headers=headers, data=buf.encode('utf-8')) + resp = requests.put(presigned_url, headers=headers, data=buf) resp.raise_for_status() finally: if self._debug: From de0c01d22bc397546391afec882e0fc3c5233576 Mon Sep 17 00:00:00 2001 From: hantmac Date: Mon, 6 May 2024 22:47:33 +0800 Subject: [PATCH 3/5] fix --- databend_py/uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databend_py/uploader.py b/databend_py/uploader.py index 711f648..10cb7c9 100644 --- a/databend_py/uploader.py +++ b/databend_py/uploader.py @@ -76,7 +76,7 @@ def _serialize_data(self, data, compress): if compress: buf = io.BytesIO() with gzip.GzipFile(fileobj=buf, mode="wb") as gzwriter: - gzwriter.write(output.encode('utf-8')) + gzwriter.write(output) output = buf.getvalue() if self._debug: print('upload:_serialize_data %s' % (time.time() - start_time)) From e54fee26232117abe17e1f0bc7ff7db1fb24b9a4 Mon Sep 17 00:00:00 2001 From: hantmac Date: Mon, 6 May 2024 22:50:57 +0800 Subject: [PATCH 4/5] fix --- databend_py/uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databend_py/uploader.py b/databend_py/uploader.py index 10cb7c9..653cda6 100644 --- a/databend_py/uploader.py +++ b/databend_py/uploader.py @@ -72,7 +72,7 @@ def _serialize_data(self, data, compress): buf = io.StringIO() csvwriter = csv.writer(buf, delimiter=',', quoting=csv.QUOTE_MINIMAL) csvwriter.writerows(data) - output = buf.getvalue() + output = buf.getvalue().encode('utf-8') if compress: buf = io.BytesIO() with gzip.GzipFile(fileobj=buf, mode="wb") as gzwriter: From b0548486e46cf7fe6e999c236ea1d788656396d8 Mon Sep 17 00:00:00 2001 From: hantmac Date: Mon, 6 May 2024 22:51:52 +0800 Subject: [PATCH 5/5] fix --- databend_py/uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databend_py/uploader.py b/databend_py/uploader.py index 653cda6..1e98956 100644 --- a/databend_py/uploader.py +++ b/databend_py/uploader.py @@ -90,7 +90,7 @@ def _upload_to_presigned_url(self, presigned_url, headers, data): buf_size = len(buf) data_len = 1 elif isinstance(data, list): - buf = self._serialize_data(data, self._compress).encode('utf-8') + buf = self._serialize_data(data, self._compress) buf_size = len(buf) data_len = len(data) else: