diff --git a/envs/s3-spark-delta-sharing-minio/.gitignore b/envs/s3-spark-delta-sharing-minio/.gitignore
new file mode 100644
index 0000000..34d4d07
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/.gitignore
@@ -0,0 +1 @@
+.s3-mount
\ No newline at end of file
diff --git a/envs/s3-spark-delta-sharing-minio/.gitkeep b/envs/s3-spark-delta-sharing-minio/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/envs/s3-spark-delta-sharing-minio/.whirl.env b/envs/s3-spark-delta-sharing-minio/.whirl.env
new file mode 100644
index 0000000..4e396a8
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/.whirl.env
@@ -0,0 +1,16 @@
+AWS_ACCESS_KEY_ID=qwerty
+AWS_SECRET_ACCESS_KEY=qwerty123
+AWS_SERVER=s3server
+AWS_PORT=9000
+DEMO_BUCKET=demo-s3-output
+
+# Spark variables
+SPARK_VERSION=3.1.1
+DELTA_VERSION=1.0.0
+DELTA_SHARING_VERSION=0.2.0
+
+# Airflow env vars
+AIRFLOW__CORE__EXPOSE_CONFIG=True
+AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
+AIRFLOW__CORE__LOAD_EXAMPLES=False
+AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True
diff --git a/envs/s3-spark-delta-sharing-minio/compose.setup.d/01_check_available_memory.sh b/envs/s3-spark-delta-sharing-minio/compose.setup.d/01_check_available_memory.sh
new file mode 100644
index 0000000..f70f0a9
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/compose.setup.d/01_check_available_memory.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+function check_docker_mem() {
+ echo "==============================================="
+ echo "== Check if there is enough available memory =="
+ echo "==============================================="
+ MEM_12_POINT_5_GB=$(((1024 * 1024 * 1024 * 25)/2))
+ AVAILABLE_MEM=$(docker info -f "{{json .MemTotal}}")
+
+ if [ "${AVAILABLE_MEM}" -lt "${MEM_12_POINT_5_GB}" ]; then
+ echo "NOT ENOUGH MEMORY AVAILABLE ($(bc <<< "scale=1; $AVAILABLE_MEM / 1024 / 1024 / 1024")). Need at least 12.5GB"
+ exit 12;
+ fi
+}
+
+check_docker_mem
\ No newline at end of file
diff --git a/envs/s3-spark-delta-sharing-minio/compose.setup.d/02_clean_s3_mount_dir.sh b/envs/s3-spark-delta-sharing-minio/compose.setup.d/02_clean_s3_mount_dir.sh
new file mode 100644
index 0000000..4fd82a4
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/compose.setup.d/02_clean_s3_mount_dir.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+function empty_s3_dir() {
+ echo "================================"
+ echo "== Cleanup local S3 mount dir =="
+ echo "================================"
+ local SCRIPT_DIR=$( dirname ${BASH_SOURCE[0]} )
+ S3_MOUNT_DIR="${SCRIPT_DIR}/../.s3-mount"
+
+ if [ "$(ls -A ${S3_MOUNT_DIR})" ]; then
+ echo "${S3_MOUNT_DIR} is not empty. Clearing NOW!!"
+ find ${S3_MOUNT_DIR} -mindepth 1 -delete
+ else
+ echo "${S3_MOUNT_DIR} is empty. Continue"
+ fi
+}
+
+empty_s3_dir
\ No newline at end of file
diff --git a/envs/s3-spark-delta-sharing-minio/config/core-site.xml b/envs/s3-spark-delta-sharing-minio/config/core-site.xml
new file mode 100644
index 0000000..03c76f1
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/config/core-site.xml
@@ -0,0 +1,60 @@
+
+
+
+
+
+ fs.s3.awsAccessKeyId
+ qwerty
+
+
+
+ fs.s3.awsSecretAccessKey
+ qwerty123
+
+
+
+ fs.s3n.awsAccessKeyId
+ qwerty
+
+
+
+ fs.s3n.awsSecretAccessKey
+ qwerty123
+
+
+
+ fs.s3a.access.key
+ qwerty
+
+
+
+ fs.s3a.secret.key
+ qwerty123
+
+
+
+ fs.s3a.connection.ssl.enabled
+ false
+
+
+
+ fs.s3a.endpoint
+ s3server:9000
+
+
+
+ fs.s3a.path.style.access
+ true
+
+
+
+ fs.s3a.impl
+ org.apache.hadoop.fs.s3a.S3AFileSystem
+
+
+
+ fs.s3.impl
+ org.apache.hadoop.fs.s3a.S3AFileSystem
+
+
+
diff --git a/envs/s3-spark-delta-sharing-minio/config/delta-sharing.yml b/envs/s3-spark-delta-sharing-minio/config/delta-sharing.yml
new file mode 100644
index 0000000..8ef0588
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/config/delta-sharing.yml
@@ -0,0 +1,33 @@
+# The format version of this config file
+version: 1
+# Config shares/schemas/tables to share
+shares:
+- name: "airflow"
+ schemas:
+ - name: "spark"
+ tables:
+ - name: "table1"
+ location: "s3a://demo-s3-output/output/data/demo/spark/20210614/"
+ - name: "cars"
+ location: "s3a://demo-s3-output/output/data/demo/spark/cars/"
+ - name: "cars-all"
+ location: "s3a://demo-s3-output/output/data/demo/spark/cars-all/"
+ - name: "cars-python"
+ location: "s3a://demo-s3-output/output/data/demo/spark/cars-python/"
+# Set the host name that the server will use
+host: "0.0.0.0"
+# Set the port that the server will listen on
+port: 8080
+# Set the url prefix for the REST APIs
+endpoint: "/delta-sharing"
+# Set the timeout of S3 presigned url in seconds
+preSignedUrlTimeoutSeconds: 900
+# How many tables to cache in the server
+deltaTableCacheSize: 10
+# Whether we can accept working with a stale version of the table. This is useful when sharing
+# static tables that will never be changed.
+stalenessAcceptable: false
+# Whether to evaluate user provided `predicateHints`
+evaluatePredicateHints: false
+authorization:
+ bearerToken: authTokenDeltaSharing432
\ No newline at end of file
diff --git a/envs/s3-spark-delta-sharing-minio/config/log4j.properties b/envs/s3-spark-delta-sharing-minio/config/log4j.properties
new file mode 100644
index 0000000..8910b5a
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/config/log4j.properties
@@ -0,0 +1,337 @@
+# This sets the global logging level and specifies the appenders
+log4j.rootLogger=INFO, console
+
+# settings for the console appender
+# log4j.appender.myConsoleAppender=org.apache.log4j.ConsoleAppender
+# log4j.appender.myConsoleAppender.layout=org.apache.log4j.PatternLayout
+# log4j.appender.myConsoleAppender.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.apache.hadoop=INFO
+# log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
+# log4j.logger.org.apache.hadoop.fs.s3=DEBUG
+# log4j.logger.org.apache.hadoop.fs=DEBUG
+# log4j.logger.org.apache.hadoop.conf=DEBUG
+log4j.logger.io.delta=INFO
+
+log4j.logger.com.amazonaws=INFO
+log4j.logger.com.amazonaws.http.conn.ssl=INFO
+# log4j.logger.com.amazonaws.internal=INFO
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Logging Threshold
+log4j.threshold=ALL
+
+# Null Appender
+log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
+
+#
+# Rolling File Appender - cap space usage at 5gb.
+#
+hadoop.log.maxfilesize=256MB
+hadoop.log.maxbackupindex=20
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
+log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
+
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollover at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# HDFS block state change log from block manager
+#
+# Uncomment the following to log normal block state change
+# messages from BlockManager in NameNode.
+#log4j.logger.BlockStateChange=DEBUG
+
+#
+#Security appender
+#
+hadoop.security.logger=INFO,NullAppender
+hadoop.security.log.maxfilesize=256MB
+hadoop.security.log.maxbackupindex=20
+log4j.category.SecurityLogger=${hadoop.security.logger}
+hadoop.security.log.file=SecurityAuth-${user.name}.audit
+log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
+log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
+
+#
+# Daily Rolling Security appender
+#
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
+
+#
+# hadoop configuration logging
+#
+
+# Uncomment the following line to turn off configuration deprecation warnings.
+# log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
+
+#
+# hdfs audit logging
+#
+hdfs.audit.logger=INFO,NullAppender
+hdfs.audit.log.maxfilesize=256MB
+hdfs.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
+log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
+log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
+log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
+log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
+
+#
+# NameNode metrics logging.
+# The default is to retain two namenode-metrics.log files up to 64MB each.
+#
+namenode.metrics.logger=INFO,NullAppender
+log4j.logger.NameNodeMetricsLog=${namenode.metrics.logger}
+log4j.additivity.NameNodeMetricsLog=false
+log4j.appender.NNMETRICSRFA=org.apache.log4j.RollingFileAppender
+log4j.appender.NNMETRICSRFA.File=${hadoop.log.dir}/namenode-metrics.log
+log4j.appender.NNMETRICSRFA.layout=org.apache.log4j.PatternLayout
+log4j.appender.NNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n
+log4j.appender.NNMETRICSRFA.MaxBackupIndex=1
+log4j.appender.NNMETRICSRFA.MaxFileSize=64MB
+
+#
+# DataNode metrics logging.
+# The default is to retain two datanode-metrics.log files up to 64MB each.
+#
+datanode.metrics.logger=INFO,NullAppender
+log4j.logger.DataNodeMetricsLog=${datanode.metrics.logger}
+log4j.additivity.DataNodeMetricsLog=false
+log4j.appender.DNMETRICSRFA=org.apache.log4j.RollingFileAppender
+log4j.appender.DNMETRICSRFA.File=${hadoop.log.dir}/datanode-metrics.log
+log4j.appender.DNMETRICSRFA.layout=org.apache.log4j.PatternLayout
+log4j.appender.DNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n
+log4j.appender.DNMETRICSRFA.MaxBackupIndex=1
+log4j.appender.DNMETRICSRFA.MaxFileSize=64MB
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+
+# AWS SDK & S3A FileSystem
+#log4j.logger.com.amazonaws=ERROR
+log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR
+#log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+
+#
+# shuffle connection log from shuffleHandler
+# Uncomment the following line to enable logging of shuffle connections
+# log4j.logger.org.apache.hadoop.mapred.ShuffleHandler.audit=DEBUG
+
+#
+# Yarn ResourceManager Application Summary Log
+#
+# Set the ResourceManager summary log filename
+yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
+# Set the ResourceManager summary log level and appender
+yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
+#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
+
+# To enable AppSummaryLogging for the RM,
+# set yarn.server.resourcemanager.appsummary.logger to
+# ,RMSUMMARY in hadoop-env.sh
+
+# Appender for ResourceManager Application Summary Log
+# Requires the following properties to be set
+# - hadoop.log.dir (Hadoop Log directory)
+# - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
+# - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
+
+log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
+log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
+log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
+log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
+log4j.appender.RMSUMMARY.MaxFileSize=256MB
+log4j.appender.RMSUMMARY.MaxBackupIndex=20
+log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
+log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+
+#
+# YARN ResourceManager audit logging
+#
+rm.audit.logger=INFO,NullAppender
+rm.audit.log.maxfilesize=256MB
+rm.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger=${rm.audit.logger}
+log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger=false
+log4j.appender.RMAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.RMAUDIT.File=${hadoop.log.dir}/rm-audit.log
+log4j.appender.RMAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RMAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.RMAUDIT.MaxFileSize=${rm.audit.log.maxfilesize}
+log4j.appender.RMAUDIT.MaxBackupIndex=${rm.audit.log.maxbackupindex}
+
+#
+# YARN NodeManager audit logging
+#
+nm.audit.logger=INFO,NullAppender
+nm.audit.log.maxfilesize=256MB
+nm.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger=${nm.audit.logger}
+log4j.additivity.org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger=false
+log4j.appender.NMAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.NMAUDIT.File=${hadoop.log.dir}/nm-audit.log
+log4j.appender.NMAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.NMAUDIT.layout.ConversionPattern=%d{ISO8601}%p %c{2}: %m%n
+log4j.appender.NMAUDIT.MaxFileSize=${nm.audit.log.maxfilesize}
+log4j.appender.NMAUDIT.MaxBackupIndex=${nm.audit.log.maxbackupindex}
+
+# HS audit log configs
+#mapreduce.hs.audit.logger=INFO,HSAUDIT
+#log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}
+#log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false
+#log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log
+#log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout
+#log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+#log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
+
+# Http Server Request Logs
+#log4j.logger.http.requests.namenode=INFO,namenoderequestlog
+#log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
+#log4j.appender.namenoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.datanode=INFO,datanoderequestlog
+#log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
+#log4j.appender.datanoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
+#log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
+#log4j.appender.resourcemanagerrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
+#log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
+#log4j.appender.jobhistoryrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
+#log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
+#log4j.appender.nodemanagerrequestlog.RetainDays=3
+
+# WebHdfs request log on datanodes
+# Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to
+# direct the log to a separate file.
+#datanode.webhdfs.logger=INFO,console
+#log4j.logger.datanode.webhdfs=${datanode.webhdfs.logger}
+#log4j.appender.HTTPDRFA=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.HTTPDRFA.File=${hadoop.log.dir}/hadoop-datanode-webhdfs.log
+#log4j.appender.HTTPDRFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.HTTPDRFA.layout.ConversionPattern=%d{ISO8601} %m%n
+#log4j.appender.HTTPDRFA.DatePattern=.yyyy-MM-dd
+
+
+# Appender for viewing information for errors and warnings
+yarn.ewma.cleanupInterval=300
+yarn.ewma.messageAgeLimitSeconds=86400
+yarn.ewma.maxUniqueMessages=250
+log4j.appender.EWMA=org.apache.hadoop.yarn.util.Log4jWarningErrorMetricsAppender
+log4j.appender.EWMA.cleanupInterval=${yarn.ewma.cleanupInterval}
+log4j.appender.EWMA.messageAgeLimitSeconds=${yarn.ewma.messageAgeLimitSeconds}
+log4j.appender.EWMA.maxUniqueMessages=${yarn.ewma.maxUniqueMessages}
+
+#
+# Fair scheduler state dump
+#
+# Use following logger to dump the state to a separate file
+
+#log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=DEBUG,FSSTATEDUMP
+#log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=false
+#log4j.appender.FSSTATEDUMP=org.apache.log4j.RollingFileAppender
+#log4j.appender.FSSTATEDUMP.File=${hadoop.log.dir}/fairscheduler-statedump.log
+#log4j.appender.FSSTATEDUMP.layout=org.apache.log4j.PatternLayout
+#log4j.appender.FSSTATEDUMP.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+#log4j.appender.FSSTATEDUMP.MaxFileSize=${hadoop.log.maxfilesize}
+#log4j.appender.FSSTATEDUMP.MaxBackupIndex=${hadoop.log.maxbackupindex}
+
+# Log levels of third-party libraries
+log4j.logger.org.apache.commons.beanutils=WARN
\ No newline at end of file
diff --git a/envs/s3-spark-delta-sharing-minio/docker-compose.yml b/envs/s3-spark-delta-sharing-minio/docker-compose.yml
new file mode 100644
index 0000000..2d75eb1
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/docker-compose.yml
@@ -0,0 +1,108 @@
+version: '3'
+
+services:
+ airflow:
+ image: docker-whirl-airflow:py-${PYTHON_VERSION}-local
+ command: ["singlemachine"]
+ ports:
+ - '5000:5000' # HTTP (Airflow Web UI)
+ env_file:
+ - .whirl.env
+ environment:
+ - AIRFLOW__API__AUTH_BACKEND
+ volumes:
+ - ${DAG_FOLDER}:/opt/airflow/dags/$PROJECTNAME
+ - ${ENVIRONMENT_FOLDER}/whirl.setup.d:${WHIRL_SETUP_FOLDER}/env.d/
+ - ${DAG_FOLDER}/whirl.setup.d:${WHIRL_SETUP_FOLDER}/dag.d/
+ - ${MOCK_DATA_FOLDER}:/mock-data
+ depends_on:
+ - s3server
+ - sparkmaster
+ links:
+ - s3server:${DEMO_BUCKET}.s3server
+
+
+ s3server:
+ image: minio/minio:RELEASE.2021-08-31T05-46-54Z
+ command:
+ - server
+ - "--address=0.0.0.0:9000"
+ - "/tmp/minio"
+ ports:
+ - 9000:9000
+ environment:
+ - MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID}
+ - MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY}
+ volumes:
+ - ./.s3-mount:/tmp/minio
+
+ delta:
+ image: deltaio/delta-sharing-server:${DELTA_SHARING_VERSION}
+ command: ["--config", "/opt/docker/conf/delta-sharing.yml"]
+ ports:
+ - 38080:8080
+ environment:
+ - HADOOP_ROOT_LOGGER=ERROR,console
+ - JAVA_OPTS=-Dlog4j.debug=false -Dlog4j.logLevel=INFO -Dlog4j.configuration=file:///opt/docker/conf/log4j.properties -Dlog4j.configurationFile=file:///opt/docker/conf/log4j.properties
+ volumes:
+ - ./config/:/opt/docker/conf/
+
+ sparkmaster:
+ build:
+ context: ${DOCKER_CONTEXT_FOLDER}/aws-spark
+ dockerfile: Dockerfile
+ args:
+ - SPARK_VERSION=${SPARK_VERSION}
+ environment:
+ - SPARK_NO_DAEMONIZE=true
+ - AWS_ACCESS_KEY_ID
+ - AWS_SECRET_ACCESS_KEY
+ - AWS_SERVER
+ - AWS_PORT
+ ports:
+ - 7077:7077
+ - 18080:8080
+ entrypoint:
+ - /usr/spark/sbin/start-master.sh
+ links:
+ - s3server:${DEMO_BUCKET}.s3server
+
+ sparkworker:
+ build:
+ context: ${DOCKER_CONTEXT_FOLDER}/aws-spark
+ dockerfile: Dockerfile
+ args:
+ - SPARK_VERSION=${SPARK_VERSION}
+ environment:
+ - SPARK_NO_DAEMONIZE=true
+ - AWS_ACCESS_KEY_ID
+ - AWS_SECRET_ACCESS_KEY
+ - AWS_SERVER
+ - AWS_PORT
+ ports:
+ - 18081:8081
+ entrypoint:
+ - /usr/spark/sbin/start-slave.sh
+ - spark://sparkmaster:7077
+ - "-m"
+ - "8G"
+ depends_on:
+ - sparkmaster
+ links:
+ - s3server:${DEMO_BUCKET}.s3server
+
+ sparkshell:
+ build:
+ context: ${DOCKER_CONTEXT_FOLDER}/aws-spark
+ dockerfile: Dockerfile
+ args:
+ - SPARK_VERSION=${SPARK_VERSION}
+ environment:
+ - SPARK_NO_DAEMONIZE=true
+ entrypoint:
+ - /usr/bin/tail
+ - "-f"
+ - /dev/null
+ depends_on:
+ - sparkmaster
+
diff --git a/envs/s3-spark-delta-sharing-minio/whirl.setup.d/01_add_connection_s3.sh b/envs/s3-spark-delta-sharing-minio/whirl.setup.d/01_add_connection_s3.sh
new file mode 100644
index 0000000..d8d8e3b
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/whirl.setup.d/01_add_connection_s3.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+echo "=================="
+echo "== Configure S3 =="
+echo "=================="
+
+pip install awscli awscli-plugin-endpoint
+
+echo -e "$AWS_ACCESS_KEY_ID\n$AWS_SECRET_ACCESS_KEY\n\n" | aws configure
+aws configure set plugins.endpoint awscli_plugin_endpoint
+aws configure set default.s3.endpoint_url http://${AWS_SERVER}:${AWS_PORT}
+aws configure set default.s3api.endpoint_url http://${AWS_SERVER}:${AWS_PORT}
+
+echo "======================"
+echo "== Create S3 Bucket =="
+echo "======================"
+while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' http://${AWS_SERVER}:${AWS_PORT}/minio/health/live)" != "200" ]]; do
+ echo "Waiting for ${AWS_SERVER} to come up on port ${AWS_PORT}..."
+ sleep 2;
+done
+
+echo "creating bucket"
+aws s3api create-bucket --bucket ${DEMO_BUCKET}
diff --git a/envs/s3-spark-delta-sharing-minio/whirl.setup.d/02_add_spark_config.sh b/envs/s3-spark-delta-sharing-minio/whirl.setup.d/02_add_spark_config.sh
new file mode 100644
index 0000000..1b19ad1
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/whirl.setup.d/02_add_spark_config.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+echo "============================"
+echo "======== Add java =========="
+echo "============================"
+
+sudo apt-get update && sudo apt-get install -y openjdk-11-jre
+export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
+
+echo "============================"
+echo "== Configure Spark config =="
+echo "============================"
+airflow connections add spark_default \
+ --conn-type spark \
+ --conn-host "spark://sparkmaster:7077" \
+ --conn-extra "{\"queue\": \"root.default\", \"deploy-mode\": \"client\"}"
+
+SDK_AWS_VERSION=1.11.563
+HADOOP_AWS_VERSION=3.2.0
+
+POSTGRES_JDBC_CHECKSUM=7ffa46f8c619377cdebcd17721b6b21ecf6659850179f96fec3d1035cf5a0cdc
+SDK_AWS_CHECKSUM=b323857424e133b44c1156a184dc3a83fa152b656f2e320a71b5637a854822d5
+HADOOP_AWS_CHECKSUM=ceac8724f8bb47d2f039eaecf4ee147623b46e4bbf26ddf73a9bb8808743655e
+
+pip install pyspark==${SPARK_VERSION}
+pip install apache-airflow-providers-apache-spark
+
+export SPARK_HOME=$(python ~/.local/bin/find_spark_home.py)
+echo "-------------------------------"
+echo "SPARK_HOME set to ${SPARK_HOME}"
+echo "-------------------------------"
+
+curl -o ${SPARK_HOME}/jars/postgresql-42.2.5.jar https://jdbc.postgresql.org/download/postgresql-42.2.5.jar && \
+ echo "$POSTGRES_JDBC_CHECKSUM ${SPARK_HOME}/jars/postgresql-42.2.5.jar" | sha256sum -c -
+
+curl -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-${SDK_AWS_VERSION}.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${SDK_AWS_VERSION}/aws-java-sdk-bundle-${SDK_AWS_VERSION}.jar && \
+ echo "$SDK_AWS_CHECKSUM ${SPARK_HOME}/jars/aws-java-sdk-bundle-${SDK_AWS_VERSION}.jar" | sha256sum -c -
+
+curl -o ${SPARK_HOME}/jars/hadoop-aws-${HADOOP_AWS_VERSION}.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_AWS_VERSION}/hadoop-aws-${HADOOP_AWS_VERSION}.jar && \
+ echo "$HADOOP_AWS_CHECKSUM ${SPARK_HOME}/jars/hadoop-aws-${HADOOP_AWS_VERSION}.jar" | sha256sum -c -
diff --git a/envs/s3-spark-delta-sharing-minio/whirl.setup.d/03_add_delta_config.sh b/envs/s3-spark-delta-sharing-minio/whirl.setup.d/03_add_delta_config.sh
new file mode 100644
index 0000000..20f98ff
--- /dev/null
+++ b/envs/s3-spark-delta-sharing-minio/whirl.setup.d/03_add_delta_config.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+echo "=============================================="
+echo "== Configure Delta and Delta Sharing config =="
+echo "=============================================="
+sudo apt-get update && sudo apt-get install -y git
+
+pip install delta-spark==${DELTA_VERSION}
+pip install delta-sharing==${DELTA_SHARING_VERSION}
+# Install fsspec from PR until this is properly released
+# pip install git+https://github.com/intake/filesystem_spec.git@refs/pull/718/head
+
+echo '{
+ "shareCredentialsVersion": 1,
+ "endpoint": "http://delta:8080/delta-sharing/",
+ "bearerToken": "authTokenDeltaSharing432"
+}' > /opt/airflow/delta.profile
diff --git a/envs/s3-spark-delta-sharing/docker-compose.yml b/envs/s3-spark-delta-sharing/docker-compose.yml
index 0350bf5..cd62fcf 100644
--- a/envs/s3-spark-delta-sharing/docker-compose.yml
+++ b/envs/s3-spark-delta-sharing/docker-compose.yml
@@ -8,6 +8,8 @@ services:
- '5000:5000' # HTTP (Airflow Web UI)
env_file:
- .whirl.env
+ environment:
+ - AIRFLOW__API__AUTH_BACKEND
volumes:
- ${DAG_FOLDER}:/opt/airflow/dags/$PROJECTNAME
- ${ENVIRONMENT_FOLDER}/whirl.setup.d:${WHIRL_SETUP_FOLDER}/env.d/
diff --git a/examples/spark-delta-sharing/.whirl.env b/examples/spark-delta-sharing/.whirl.env
index 1aa43ae..95adcb1 100644
--- a/examples/spark-delta-sharing/.whirl.env
+++ b/examples/spark-delta-sharing/.whirl.env
@@ -1,3 +1,4 @@
+AIRFLOW_VERSION=2.1.3
WHIRL_ENVIRONMENT=s3-spark-delta-sharing
MOCK_DATA_FOLDER=$(pwd)/mock-data
diff --git a/whirl b/whirl
index 617d7d6..a575113 100755
--- a/whirl
+++ b/whirl
@@ -143,6 +143,9 @@ start() {
done
if [[ "$(curl -s -o /dev/null -w %\{http_code\} -X PATCH -H 'Content-Type: application/json' --user admin:admin "http://localhost:${AIRFLOW_UI_PORT}/api/v1/dags/${DAG_ID}?update_mask=is_paused" -d '{ "is_paused": false }')" != "200" ]]; then
echo "Unable to unpause dag with id ${DAG_ID}."
+ echo "Command: curl -X PATCH -H 'Content-Type: application/json' --user admin:admin \"http://localhost:${AIRFLOW_UI_PORT}/api/v1/dags/${DAG_ID}?update_mask=is_paused\" -d '{ \"is_paused\": false }'"
+ echo "Output:"
+ curl -X PATCH -H 'Content-Type: application/json' --user admin:admin "http://localhost:${AIRFLOW_UI_PORT}/api/v1/dags/${DAG_ID}?update_mask=is_paused" -d '{ "is_paused": false }'
stop
exit 3
else