Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
672 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.s3-mount |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
AWS_ACCESS_KEY_ID=qwerty | ||
AWS_SECRET_ACCESS_KEY=qwerty123 | ||
AWS_SERVER=s3server | ||
AWS_PORT=9000 | ||
DEMO_BUCKET=demo-s3-output | ||
|
||
# Spark variables | ||
SPARK_VERSION=3.1.1 | ||
DELTA_VERSION=1.0.0 | ||
DELTA_SHARING_VERSION=0.2.0 | ||
|
||
# Airflow env vars | ||
AIRFLOW__CORE__EXPOSE_CONFIG=True | ||
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False | ||
AIRFLOW__CORE__LOAD_EXAMPLES=False | ||
AIRFLOW__WEBSERVER__EXPOSE_CONFIG=True |
16 changes: 16 additions & 0 deletions
16
envs/s3-spark-delta-sharing-minio/compose.setup.d/01_check_available_memory.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env bash | ||
|
||
function check_docker_mem() { | ||
echo "===============================================" | ||
echo "== Check if there is enough available memory ==" | ||
echo "===============================================" | ||
MEM_12_POINT_5_GB=$(((1024 * 1024 * 1024 * 25)/2)) | ||
AVAILABLE_MEM=$(docker info -f "{{json .MemTotal}}") | ||
|
||
if [ "${AVAILABLE_MEM}" -lt "${MEM_12_POINT_5_GB}" ]; then | ||
echo "NOT ENOUGH MEMORY AVAILABLE ($(bc <<< "scale=1; $AVAILABLE_MEM / 1024 / 1024 / 1024")). Need at least 12.5GB" | ||
exit 12; | ||
fi | ||
} | ||
|
||
check_docker_mem |
18 changes: 18 additions & 0 deletions
18
envs/s3-spark-delta-sharing-minio/compose.setup.d/02_clean_s3_mount_dir.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/usr/bin/env bash | ||
|
||
function empty_s3_dir() { | ||
echo "================================" | ||
echo "== Cleanup local S3 mount dir ==" | ||
echo "================================" | ||
local SCRIPT_DIR=$( dirname ${BASH_SOURCE[0]} ) | ||
S3_MOUNT_DIR="${SCRIPT_DIR}/../.s3-mount" | ||
|
||
if [ "$(ls -A ${S3_MOUNT_DIR})" ]; then | ||
echo "${S3_MOUNT_DIR} is not empty. Clearing NOW!!" | ||
find ${S3_MOUNT_DIR} -mindepth 1 -delete | ||
else | ||
echo "${S3_MOUNT_DIR} is empty. Continue" | ||
fi | ||
} | ||
|
||
empty_s3_dir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
<?xml version="1.0"?> | ||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | ||
|
||
<configuration> | ||
<property> | ||
<name>fs.s3.awsAccessKeyId</name> | ||
<value>qwerty</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3.awsSecretAccessKey</name> | ||
<value>qwerty123</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3n.awsAccessKeyId</name> | ||
<value>qwerty</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3n.awsSecretAccessKey</name> | ||
<value>qwerty123</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3a.access.key</name> | ||
<value>qwerty</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3a.secret.key</name> | ||
<value>qwerty123</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3a.connection.ssl.enabled</name> | ||
<value>false</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3a.endpoint</name> | ||
<value>s3server:9000</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3a.path.style.access</name> | ||
<value>true</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3a.impl</name> | ||
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value> | ||
</property> | ||
|
||
<property> | ||
<name>fs.s3.impl</name> | ||
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value> | ||
</property> | ||
|
||
</configuration> |
33 changes: 33 additions & 0 deletions
33
envs/s3-spark-delta-sharing-minio/config/delta-sharing.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# The format version of this config file | ||
version: 1 | ||
# Config shares/schemas/tables to share | ||
shares: | ||
- name: "airflow" | ||
schemas: | ||
- name: "spark" | ||
tables: | ||
- name: "table1" | ||
location: "s3a://demo-s3-output/output/data/demo/spark/20210614/" | ||
- name: "cars" | ||
location: "s3a://demo-s3-output/output/data/demo/spark/cars/" | ||
- name: "cars-all" | ||
location: "s3a://demo-s3-output/output/data/demo/spark/cars-all/" | ||
- name: "cars-python" | ||
location: "s3a://demo-s3-output/output/data/demo/spark/cars-python/" | ||
# Set the host name that the server will use | ||
host: "0.0.0.0" | ||
# Set the port that the server will listen on | ||
port: 8080 | ||
# Set the url prefix for the REST APIs | ||
endpoint: "/delta-sharing" | ||
# Set the timeout of S3 presigned url in seconds | ||
preSignedUrlTimeoutSeconds: 900 | ||
# How many tables to cache in the server | ||
deltaTableCacheSize: 10 | ||
# Whether we can accept working with a stale version of the table. This is useful when sharing | ||
# static tables that will never be changed. | ||
stalenessAcceptable: false | ||
# Whether to evaluate user provided `predicateHints` | ||
evaluatePredicateHints: false | ||
authorization: | ||
bearerToken: authTokenDeltaSharing432 |
Oops, something went wrong.