In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql import functions as F
from pyspark.sql import Window

spark = (SparkSession.builder
            .appName('DeltaMinIO')
            .config("spark.jars.packages", "org.apache.hadoop:hadoop-aws:3.3.1,"
                                            "com.amazonaws:aws-java-sdk-bundle:1.12.709,"
                                            "io.delta:delta-core_2.12:2.3.0")
            # S3 / Minio
            .config("spark.hadoop.fs.s3a.access.key", "andreyolv")
            .config("spark.hadoop.fs.s3a.secret.key", "andreyolv")
            .config("spark.hadoop.fs.s3a.endpoint", "http://minio.minio:9000")
            .config("spark.hadoop.fs.s3a.path.style.access", True)
            .config("spark.hadoop.fs.s3a.fast.upload", True)
            .config("spark.hadoop.fs.s3a.multipart.size", 104857600)
            .config("fs.s3a.connection.maximum", 100)
            .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
            # Delta
            .config("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore")
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
            # Hive MetaStore
            .config("spark.sql.catalogImplementation", "hive")
            .config("spark.sql.hive.thriftServer.singleSession", "false")
            .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .config("spark.hive.metastore.uris", "thrift://metastore.iceberg:9083")
            .config("spark.hive.metastore.schema.verification", "false")
            .getOrCreate()
        )

In [6]:
spark.sql("""
SHOW CATALOGS;
""").show()

+-------------+
|      catalog|
+-------------+
|spark_catalog|
+-------------+



In [3]:
spark.sql("""
CREATE SCHEMA delta_db 
LOCATION 's3a://lakehouse/delta/';
""")

DataFrame[]

In [8]:
spark.sql("""
CREATE TABLE IF NOT EXISTS spark_catalog.delta_db.table1 (
revenue int,
department string,
boss string)
USING DELTA
location 's3a://lakehouse/delta/table1'""")

DataFrame[]

In [9]:
spark.sql("""
INSERT INTO spark_catalog.delta_db.table1 (revenue, department, boss)
VALUES (10000, 'Sales', 'John Smith');

""")

DataFrame[]

In [11]:
spark.sql("""
SELECT * FROM spark_catalog.delta_db.table1;
""").show()

+-------+----------+----------+
|revenue|department|      boss|
+-------+----------+----------+
|  10000|     Sales|John Smith|
+-------+----------+----------+

