In [1]:
!pip install pyiceberg

Collecting pyiceberg
  Downloading pyiceberg-0.9.0.tar.gz (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.0/612.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting cachetools<6.0.0,>=5.5.0 (from pyiceberg)
  Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collecting click<9.0.0,>=7.1.1 (from pyiceberg)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting fsspec>=2023.1.0 (from pyiceberg)
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting mmh3<6.0.0,>=4.0.0 (from pyiceberg)
  Downloading mmh3-5.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (16 kB)
Collecting pydantic!=2.4.0,!=2.4.1,<3.0,>=2.0 (from pyiceberg)
  Downloading pydantic-2.11.3-py3-none-any.whl.metadata (65 kB)
[2

In [2]:
import logging
import http.client as http_client

http_client.HTTPConnection.debuglevel = 1
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
logging.getLogger("urllib3").setLevel(logging.DEBUG)

In [3]:
from pyiceberg.catalog import load_catalog
from pyiceberg.schema import Schema
from pyiceberg.types import NestedField, IntegerType, StringType

# Load the REST catalog (this hits http://iceberg-rest:8181)
rest_catalog = load_catalog(
    name="rest",
    **{
        "type": "rest",
        "uri": "http://iceberg-rest:8181",
        "s3.endpoint": "http://minio:9000",
        "s3.region": "us-east-1", 
        "s3.path-style-access": "true",
        "s3.access-key-id": "admin",
        "s3.secret-access-key": "password",
    },
)

# Define schema
schema = Schema(
    NestedField(1, "id", IntegerType(), required=True),
    NestedField(2, "name", StringType(), required=False),
)

# Create namespace if not present
try:
    rest_catalog.create_namespace("default")
except Exception as e:
    print(f"Namespace may already exist: {e}")

# Create table (if not exists)
identifier = "default.sample_table"
rest_catalog.create_table(identifier=identifier, schema=schema)


DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): iceberg-rest:8181
DEBUG:urllib3.connectionpool:http://iceberg-rest:8181 "GET /v1/config HTTP/1.1" 200 282
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): iceberg-rest:8181
DEBUG:urllib3.connectionpool:http://iceberg-rest:8181 "POST /v1/namespaces HTTP/1.1" 200 90


send: b'GET /v1/config HTTP/1.1\r\nHost: iceberg-rest:8181\r\nUser-Agent: PyIceberg/0.9.0\r\nAccept-Encoding: gzip, deflate, br, zstd\r\nAccept: */*\r\nConnection: keep-alive\r\nContent-type: application/json\r\nX-Client-Version: 0.14.1\r\nX-Iceberg-Access-Delegation: vended-credentials\r\n\r\n'
reply: 'HTTP/1.1 200 OK\r\n'
header: Date: Thu, 24 Apr 2025 08:34:26 GMT
header: Content-Type: application/json
header: Vary: Accept-Encoding
header: Content-Encoding: gzip
header: Content-Length: 282
header: Server: Jetty(11.0.25)
send: b'POST /v1/namespaces HTTP/1.1\r\nHost: iceberg-rest:8181\r\nUser-Agent: PyIceberg/0.9.0\r\nAccept-Encoding: gzip, deflate, br, zstd\r\nAccept: */*\r\nConnection: keep-alive\r\nContent-type: application/json\r\nX-Client-Version: 0.14.1\r\nX-Iceberg-Access-Delegation: vended-credentials\r\nContent-Length: 44\r\n\r\n'
send: b'{"namespace": ["default"], "properties": {}}'
reply: 'HTTP/1.1 200 OK\r\n'
header: Date: Thu, 24 Apr 2025 08:34:26 GMT
header: Content-Type

DEBUG:urllib3.connectionpool:http://iceberg-rest:8181 "POST /v1/namespaces/default/tables HTTP/1.1" 500 10422


reply: 'HTTP/1.1 500 Server Error\r\n'
header: Date: Thu, 24 Apr 2025 08:34:27 GMT
header: Content-Type: application/json
header: Content-Length: 10422
header: Server: Jetty(11.0.25)


ServerError: NoSuchBucketException: The specified bucket does not exist (Service: S3, Status Code: 404, Request ID: 18393425BF4FC60E, Extended Request ID: dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8)

In [53]:
namespaces = rest_catalog.list_namespaces()
print("Namespaces:", namespaces)

Namespaces: [('default',)]


In [51]:
tables = rest_catalog.list_tables(namespace="default")
print("Tables in 'default':", tables)

Tables in 'default': []


In [55]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date, date_add, expr

from pyiceberg.catalog import load_catalog
from pyiceberg.schema import Schema
from pyiceberg.types import FixedType, NestedField, UUIDType
spark = (
    SparkSession
        .builder
        .config("spark.sql.shuffle.partitions", "1")
        .config("spark.default.parallelism", "1")
        .getOrCreate()
)

In [56]:
catalogs = {
    'rest': load_catalog(
        "rest",
        **{
            "type": "rest",
            "uri": "http://rest:8181",
            "s3.endpoint": "http://minio:9000",
            "s3.access-key-id": "admin",
            "s3.secret-access-key": "password",
        },
    )
}

In [57]:
for catalog_name, catalog in catalogs.items():
    spark.sql(
        f"""
      CREATE DATABASE IF NOT EXISTS default;
    """
    )

    schema = Schema(
        NestedField(field_id=1, name="uuid_col", field_type=UUIDType(), required=False),
        NestedField(field_id=2, name="fixed_col", field_type=FixedType(25), required=False),
    )

    catalog.create_table(identifier="default.test_uuid_and_fixed_unpartitioned", schema=schema)

    spark.sql(
        f"""
        INSERT INTO {catalog_name}.default.test_uuid_and_fixed_unpartitioned VALUES
        ('102cb62f-e6f8-4eb0-9973-d9b012ff0967', CAST('1234567890123456789012345' AS BINARY)),
        ('ec33e4b2-a834-4cc3-8c4a-a1d3bfc2f226', CAST('1231231231231231231231231' AS BINARY)),
        ('639cccce-c9d2-494a-a78c-278ab234f024', CAST('12345678901234567ass12345' AS BINARY)),
        ('c1b0d8e0-0b0e-4b1e-9b0a-0e0b0d0c0a0b', CAST('asdasasdads12312312312111' AS BINARY)),
        ('923dae77-83d6-47cd-b4b0-d383e64ee57e', CAST('qweeqwwqq1231231231231111' AS BINARY));
        """
    )


ServerError: NoSuchBucketException: The specified bucket does not exist (Service: S3, Status Code: 404, Request ID: 1839321DA7B548FB, Extended Request ID: dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8)