-
Notifications
You must be signed in to change notification settings - Fork 378
Open
Description
Apache Iceberg version
0.10.0 (latest release)
Please describe the bug 🐞
In v0.9.1 the following script successfully wrote to a local, docker-compose-based Lakekeeper REST catalog with MinIO S3 storage:
from pyiceberg.catalog.rest import RestCatalog
import pyarrow as pa
catalog_name = "default"
catalog_properties = {
"uri": "http://localhost:58080/iceberg/catalog",
"warehouse": "playground",
"credential": "<CLIENT_ID>:<CLIENT_SECRET>",
"oauth2-server-uri": "http://localhost:58080/auth/realms/iceberg/protocol/openid-connect/token",
"scope": "lakekeeper"
}
catalog = RestCatalog(catalog_name, **catalog_properties)
print(f"FileIO token={tbl.io.properties['token']}")
df = pa.Table.from_pylist(
[
{"city": "Amsterdam", "lat": 52.371807, "long": 4.896029},
{"city": "San Francisco", "lat": 37.773972, "long": -122.431297},
{"city": "Drachten", "lat": 53.11254, "long": 6.0989},
{"city": "Paris", "lat": 48.864716, "long": 2.349014},
],
)
catalog.create_namespace_if_not_exists(namespace)
tbl = catalog.create_table_if_not_exists(f"{namespace}.{table_name}", schema=df.schema)
tbl.append(df)
When the FileIO was created by:
iceberg-python/pyiceberg/catalog/__init__.py
Line 756 in 2bff5ef
return load_file_io({**self.properties, **properties}, location) |
the self.properties
of the Catalog
already contained a token that was created upon initializing the catalog session.
After the AuthManager refactor a token no longer appears in the Catalog self.properties
that is passed to FileIO.__init__
causing the following exception on the tbl.append
line:
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/io/fsspec.py", line 118, in s3v4_rest_signer
raise SignError(f"Failed to sign request {response.status_code}: {signer_body}") from e
pyiceberg.exceptions.SignError: Failed to sign request 401: {'method': 'PUT', 'region': 'local-01', 'uri': 'http://minio:59000/playground/019994c5-5cba-7341-9a6b-ae48b68fc36b/019994c5-f647-7791-a57c-1fb7b493925b/metadata/snap-1472413044088081430-0-f3944734-cc0a-4006-8802-e340f5b13aa9.avro', 'headers': {'User-Agent': ['aiobotocore/2.24.2 md/Botocore#1.40.18 ua/2.1 os/macos#24.6.0 md/arch#x86_64 lang/python#3.13.3 md/pyimpl#CPython m/b,a,N,D cfg/retry-mode#legacy botocore/1.40.18'], 'Expect': ['100-continue']}}
Full traceback
Traceback (most recent call last):
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/io/fsspec.py", line 115, in s3v4_rest_signer
response.raise_for_status()
~~~~~~~~~~~~~~~~~~~~~~~~~^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/requests/models.py", line 1026, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: http://localhost:58080/iceberg/catalog/v1/signer/56d90850-9d15-11f0-b1ba-ef4b76728eaf/tabular-id/019994c5-f647-7791-a57c-1fb7b493925b/v1/aws/s3/sign
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/dmn58364/Code/adp-scripts/test_catalog_access.py", line 91, in <module>
tbl.append(df)
~~~~~~~~~~^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/__init__.py", line 1362, in append
tx.append(df=df, snapshot_properties=snapshot_properties, branch=branch)
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/__init__.py", line 482, in append
with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/update/__init__.py", line 76, in __exit__
self.commit()
~~~~~~~~~~~^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/update/__init__.py", line 72, in commit
self._transaction._apply(*self._commit())
~~~~~~~~~~~~^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/update/snapshot.py", line 277, in _commit
with write_manifest_list(
~~~~~~~~~~~~~~~~~~~^
format_version=self._transaction.table_metadata.format_version,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...<4 lines>...
avro_compression=self._compression,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
) as writer:
^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/manifest.py", line 1231, in __exit__
self._writer.__exit__(exc_type, exc_value, traceback)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/avro/file.py", line 277, in __exit__
self.output_stream.close()
~~~~~~~~~~~~~~~~~~~~~~~~^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/spec.py", line 2206, in close
self.flush(force=True)
~~~~~~~~~~^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/spec.py", line 2069, in flush
if self._upload_chunk(final=force) is not False:
~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 2449, in _upload_chunk
self.commit()
~~~~~~~~~~~^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 2475, in commit
write_result = self._call_s3("put_object", **kw, **match)
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 2309, in _call_s3
return self.fs.call_s3(method, self.s3_additional_kwargs, *kwarglist, **kwargs)
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/asyn.py", line 118, in wrapper
return sync(self.loop, func, *args, **kwargs)
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/asyn.py", line 103, in sync
raise return_result
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/asyn.py", line 56, in _runner
result[0] = await coro
^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 371, in _call_s3
return await _error_wrapper(
^^^^^^^^^^^^^^^^^^^^^
method, kwargs=additional_kwargs, retries=self.retries
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 146, in _error_wrapper
raise err
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 114, in _error_wrapper
return await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/context.py", line 36, in wrapper
return await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/client.py", line 406, in _make_api_call
http, parsed_response = await self._make_request(
^^^^^^^^^^^^^^^^^^^^^^^^^
operation_model, request_dict, request_context
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/client.py", line 432, in _make_request
return await self._endpoint.make_request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
operation_model, request_dict
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/endpoint.py", line 116, in _send_request
request = await self.create_request(request_dict, operation_model)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/endpoint.py", line 104, in create_request
await self._event_emitter.emit(
...<3 lines>...
)
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/hooks.py", line 68, in _emit
response = await resolve_awaitable(handler(**kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/_helpers.py", line 6, in resolve_awaitable
return await obj
^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/signers.py", line 26, in handler
return await self.sign(operation_name, request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/signers.py", line 49, in sign
await self._event_emitter.emit(
...<7 lines>...
)
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/hooks.py", line 68, in _emit
response = await resolve_awaitable(handler(**kwargs))
~~~~~~~^^^^^^^^^^
File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/io/fsspec.py", line 118, in s3v4_rest_signer
raise SignError(f"Failed to sign request {response.status_code}: {signer_body}") from e
pyiceberg.exceptions.SignError: Failed to sign request 401: {'method': 'PUT', 'region': 'local-01', 'uri': 'http://minio:59000/playground/019994c5-5cba-7341-9a6b-ae48b68fc36b/019994c5-f647-7791-a57c-1fb7b493925b/metadata/snap-1472413044088081430-0-f3944734-cc0a-4006-8802-e340f5b13aa9.avro', 'headers': {'User-Agent': ['aiobotocore/2.24.2 md/Botocore#1.40.18 ua/2.1 os/macos#24.6.0 md/arch#x86_64 lang/python#3.13.3 md/pyimpl#CPython m/b,a,N,D cfg/retry-mode#legacy botocore/1.40.18'], 'Expect': ['100-continue']}}
I can workaround this by adding a token to the **properties
passed to RestCatalog.__init__
but I don't think this token would then get refreshed?
Willingness to contribute
- I can contribute a fix for this bug independently
- I would be willing to contribute a fix for this bug with guidance from the Iceberg community
- I cannot contribute a fix for this bug at this time
jim-ngoo
Metadata
Metadata
Assignees
Labels
No labels