In [1]:
import os
# get the accessKey and secretKey from Environment
accessKey = os.environ['AWS_ACCESS_KEY_ID']
secretKey = os.environ['AWS_SECRET_ACCESS_KEY']

from pyspark.sql import SparkSession
spark = (
    SparkSession.builder
        .appName("Jupyter")
        .master("local[*]")

        .config("spark.jars.packages",
                "org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.1,"
                "org.apache.iceberg:iceberg-aws-bundle:1.10.1")

        # Iceberg catalog
        .config("spark.sql.catalog.hiverest", "org.apache.iceberg.spark.SparkCatalog")
        .config("spark.sql.catalog.hiverest.type", "rest")
        .config("spark.sql.catalog.hiverest.uri", "http://hive-metastore:9084/iceberg")
        .config("spark.sql.catalog.hiverest.io-impl", "org.apache.iceberg.aws.s3.S3FileIO")
        .config("spark.sql.catalog.hiverest.warehouse", "s3a://admin-bucket/iceberg/warehouse")

        # ‚≠ê REQUIRED FOR MINIO WITH ICEBERG AWS SDK
        .config("spark.sql.catalog.hiverest.s3.endpoint", "http://minio-1:9000")
        .config("spark.sql.catalog.hiverest.s3.path-style-access", "true")
        .config("spark.sql.catalog.hiverest.s3.access-key-id", accessKey)
        .config("spark.sql.catalog.hiverest.s3.secret-access-key", secretKey)

        .config("spark.sql.defaultCatalog", "hiverest")

        .config(
            "spark.sql.extensions",
            "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
        )

        .getOrCreate()
)


In [2]:
%load_ext sql

%sql spark

In [3]:
%%sql
select * from cards_db.card_transaction_t;

Field 1,Field 2,Field 3,Field 4,Field 5,Field 6,Field 7
9039983a-480d-943d-d738-2df69d44d09d,5151-3898-2029-0854,merchant-113,254.24661640734,UGX,FI,2026-02-05 21:12:39.384000
62a04a61-032a-5bc1-541e-f41aef491535,6500-3011-1590-4175,merchant-83,191.15589802475196,TND,SE,2026-02-05 21:12:39.564000


In [6]:
from pyspark.sql import SparkSession
df = spark.sql("SELECT * FROM cards_db.card_transaction_t")
df.show()

+--------------------+--------------------+------------+------------------+--------+-------+--------------------+
|       transactionId|          cardNumber|  merchantId|            amount|currency|country|           timestamp|
+--------------------+--------------------+------------+------------------+--------+-------+--------------------+
|ad4852a1-6246-059...| 2131-7976-1533-7492|merchant-121| 45.61248473897399|     MGF|     PT|2026-02-05 21:03:...|
|b6198dac-6a35-aa8...| 2720-2936-3751-6138|merchant-177|152.49878953405468|     RSD|     NO|2026-02-05 21:03:...|
|b2ae2879-a0bb-1ef...| 5421-2645-9305-3946|merchant-109| 212.7871962942436|     VED|     SE|2026-02-05 21:03:...|
|e429f2ef-21cc-a57...| 6579-1109-7182-6345|merchant-175| 30.46394745935917|     DKK|     AT|2026-02-05 21:03:...|
|a4a5f914-aaac-359...| 5038-3960-2730-9698| merchant-38|  75.0937743277961|     COP|     AT|2026-02-05 21:03:...|
|fd1e6d59-9907-130...| 5018-7838-8928-9495|merchant-185|182.87170216051524|     KZT|    

In [7]:
df = spark.read.table("hiverest.cards_db.card_transaction_t")
df.show()

+--------------------+-------------------+------------+------------------+--------+-------+--------------------+
|       transactionId|         cardNumber|  merchantId|            amount|currency|country|           timestamp|
+--------------------+-------------------+------------+------------------+--------+-------+--------------------+
|6b653e0e-f54a-492...|5430-6252-8206-8119|merchant-140|18.612505889935804|     SLE|     DK|2026-02-05 21:02:...|
|a14e6932-138c-be7...|5018-1360-4419-1153|merchant-156| 59.18014141303245|     NGN|     LI|2026-02-05 21:02:...|
|9c45d4f1-811d-c9e...|5199-4145-8215-8998|merchant-101|167.12650083551367|     IEP|     AT|2026-02-05 21:02:...|
|a9fa2a39-e2c5-8d6...|6390-5764-9094-0544|merchant-155|148.69017651795275|     ROL|     LI|2026-02-05 21:02:...|
|63b88eaa-5f5c-a6a...|5267-6917-9373-7319|merchant-116|216.45935614010082|     YUM|     IT|2026-02-05 21:02:...|
|5b456de8-a599-f35...|5661-9610-7781-5045|merchant-161|229.27723350946005|     MRO|     MT|2026-

In [9]:
df = spark.read.format("iceberg").load("hiverest.cards_db.card_transaction_t")
df.show()

+--------------------+-------------------+------------+------------------+--------+-------+--------------------+
|       transactionId|         cardNumber|  merchantId|            amount|currency|country|           timestamp|
+--------------------+-------------------+------------+------------------+--------+-------+--------------------+
|d59d184a-b3fb-c38...|5467-8762-2483-2686|merchant-171|121.05412245429775|     CVE|     DK|2026-02-05 21:14:...|
|9b3c9c1c-ab48-8e7...|6599-1192-3504-8135|merchant-147|229.58226692891316|     MXN|     AT|2026-02-05 21:14:...|
|69a4d2d1-cece-908...|5651-4710-7491-9839|merchant-124| 73.22309423606737|     LUF|     DK|2026-02-05 21:14:...|
|9c5abbcd-ad61-675...|2720-1909-5333-4654|merchant-151| 89.47145630057767|     ZWG|     CH|2026-02-05 21:14:...|
|a72effd8-5be0-70f...|6304-0797-5981-8945|merchant-185|219.91375019895293|     USD|     NL|2026-02-05 21:14:...|
|332fc55f-ac88-f55...|5020-4144-9641-2739|merchant-103|202.90907073399833|     BGL|     PT|2026-