Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
aec82ae
[feat](paimon) support jdbc catalog type
xylaaaaa Mar 6, 2026
98f94a8
[test](paimon) add jdbc catalog regression case
xylaaaaa Mar 6, 2026
94d642c
[feat](paimon) align jdbc property aliases
xylaaaaa Mar 6, 2026
4a4b729
[test](paimon) make jdbc regression env aware
xylaaaaa Mar 6, 2026
a604130
[test](paimon) seed jdbc regression data via spark
xylaaaaa Mar 6, 2026
a2afce4
[test](paimon) avoid spark jdbc seed for regression
xylaaaaa Mar 6, 2026
465746c
[test](paimon) add jdbc regression output
xylaaaaa Mar 6, 2026
57adca8
[fix](paimon) address jdbc catalog PR review comments
xylaaaaa Mar 9, 2026
3aee75e
docs: annotate slow external regression cases with PR links
xylaaaaa Mar 9, 2026
0c14229
[fix](paimon) add jdbc driver for spark seed catalog in regression
xylaaaaa Mar 9, 2026
79b25eb
[test] disable paimon jdbc lock for spark seed catalog
xylaaaaa Mar 9, 2026
e2dfb69
[doc] remove accidentally committed external regression note
xylaaaaa Mar 9, 2026
75b828c
[test] align paimon jdbc catalog-key in spark seed
xylaaaaa Mar 10, 2026
85aa5d6
docs: summarize databricks tpch results
xylaaaaa Mar 15, 2026
b11a57d
docs: translate tpch result summary to chinese
xylaaaaa Mar 15, 2026
4bc1c8b
docs: fold q20 rerun into medium summary
xylaaaaa Mar 15, 2026
660925c
docs: summarize databricks tpcds metric backfill
xylaaaaa Mar 20, 2026
68a6e3c
docs: add databricks doris tpcds query time table
xylaaaaa Mar 20, 2026
8b88059
docs: organize tpcds result directories
xylaaaaa Mar 20, 2026
b3dc4a5
Add Hive full-flow benchmark helpers
xylaaaaa Mar 23, 2026
592bde9
Switch Hive3 execution engine to Tez
xylaaaaa Mar 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions docker/thirdparties/benchmark-hive-fullflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${ROOT}/.." && pwd)"
BENCH_DIR="${BENCH_DIR:-${REPO_ROOT}/output/hive-fullflow-bench}"
mkdir -p "${BENCH_DIR}"

run_case() {
local label="$1"
shift
local log_file="${BENCH_DIR}/${label}.log"
local start_ts end_ts elapsed

echo "INFO: running ${label}"
start_ts="$(date +%s)"
if "$@" >"${log_file}" 2>&1; then
end_ts="$(date +%s)"
elapsed="$((end_ts - start_ts))"
echo "${label},success,${elapsed},${log_file}"
return 0
fi

end_ts="$(date +%s)"
elapsed="$((end_ts - start_ts))"
echo "${label},failure,${elapsed},${log_file}"
return 1
}

main() {
local overall_rc=0
{
echo "label,status,seconds,log_file"
run_case \
"current_hive3_fullflow" \
env \
POC_WORKDIR="${REPO_ROOT}/output/hive3-current-poc" \
POC_TMPDIR="${REPO_ROOT}/output/hive3-current-poc/tmp" \
HEALTH_TIMEOUT_SECONDS="${HEALTH_TIMEOUT_SECONDS:-180}" \
bash "${ROOT}/docker-compose/hive/run-hive3-current-poc.sh" || overall_rc=1
run_case \
"tez_hive3_fullflow" \
env \
POC_WORKDIR="${REPO_ROOT}/output/hive3-tez-poc" \
POC_TMPDIR="${REPO_ROOT}/output/hive3-tez-poc/tmp" \
SERVICE_TIMEOUT_SECONDS="${SERVICE_TIMEOUT_SECONDS:-180}" \
HEALTH_TIMEOUT_SECONDS="${HEALTH_TIMEOUT_SECONDS:-240}" \
bash "${ROOT}/docker-compose/kerberos/run-hive3-tez-poc.sh" || overall_rc=1
} | tee "${BENCH_DIR}/summary.csv"

exit "${overall_rc}"
}

main "$@"
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ HIVE_SITE_CONF_hive_metastore_event_db_notification_api_auth=false
HIVE_SITE_CONF_hive_metastore_dml_events=true
HIVE_SITE_CONF_hive_metastore_transactional_event_listeners=org.apache.hive.hcatalog.listener.DbNotificationListener
HIVE_SITE_CONF_hive_stats_column_autogather=false
HIVE_SITE_CONF_hive_execution_engine=tez
HIVE_SITE_CONF_fs_s3_impl=org.apache.hadoop.fs.s3a.S3AFileSystem
HIVE_SITE_CONF_fs_s3a_impl=org.apache.hadoop.fs.s3a.S3AFileSystem
HIVE_SITE_CONF_fs_s3a_access_key=${AWSAk}
Expand Down Expand Up @@ -48,4 +49,3 @@ HIVE_SITE_CONF_fs_gs_auth_service_account_private_key_id=${GCSAccountPrivateKeyI
HIVE_SITE_CONF_fs_gs_auth_service_account_private_key=${GCSAccountPrivateKey}
HIVE_SITE_CONF_fs_gs_proxy_address=${GCSProxyAddress}
enablePaimonHms=${enablePaimonHms}

143 changes: 143 additions & 0 deletions docker/thirdparties/docker-compose/hive/run-hive3-current-poc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${ROOT}/../../../.." && pwd)"
WORKDIR="${POC_WORKDIR:-${REPO_ROOT}/output/hive3-current-poc}"
TMPDIR="${POC_TMPDIR:-${WORKDIR}/tmp}"
POC_CONTAINER_UID="${POC_CONTAINER_UID:-hive3curpoc}"
POC_KEEP_UP="${POC_KEEP_UP:-0}"
HS_PORT="${POC_HS_PORT:-43000}"
HMS_PORT="${POC_HMS_PORT:-49383}"
PG_PORT="${POC_PG_PORT:-45732}"
FS_PORT="${POC_FS_PORT:-48320}"
LOAD_PARALLEL="${POC_LOAD_PARALLEL:-1}"
HEALTH_TIMEOUT_SECONDS="${HEALTH_TIMEOUT_SECONDS:-180}"
IP_HOST="${POC_IP_HOST:-127.0.0.1}"

COMPOSE_DIR="${WORKDIR}/hive"
COMPOSE_FILE="${COMPOSE_DIR}/hive-3x.yaml"
ENV_FILE="${COMPOSE_DIR}/hadoop-hive-3x.env"
SERVER_CONTAINER="${POC_CONTAINER_UID}hive3-server"

cleanup() {
if [[ "${POC_KEEP_UP}" == "1" ]]; then
echo "INFO: keep current Hive POC environment at ${WORKDIR}"
return
fi

if [[ -f "${COMPOSE_FILE}" ]]; then
docker compose -f "${COMPOSE_FILE}" --env-file "${ENV_FILE}" down >/dev/null 2>&1 || true
fi
rm -rf "${WORKDIR}"
}

find_beeline() {
docker exec "${SERVER_CONTAINER}" bash -lc '
if command -v beeline >/dev/null 2>&1; then
command -v beeline
elif [[ -x /opt/hive/bin/beeline ]]; then
echo /opt/hive/bin/beeline
else
exit 1
fi
'
}

probe_beeline() {
local beeline_bin="$1"
docker exec "${SERVER_CONTAINER}" bash -lc "
set -euo pipefail
${beeline_bin} -u 'jdbc:hive2://127.0.0.1:${HS_PORT}/default' -n root --showHeader=false --outputformat=tsv2 -e 'show databases;'
"
}

wait_until_ready() {
local beeline_bin waited
beeline_bin="$(find_beeline)"
waited=0
while (( waited < HEALTH_TIMEOUT_SECONDS )); do
if probe_beeline "${beeline_bin}" >/dev/null 2>&1; then
echo "INFO: current HiveServer2 is ready on ${HS_PORT}"
return 0
fi
if (( waited % 30 == 0 )); then
echo "INFO: waiting for current HiveServer2 to become ready"
fi
sleep 5
waited=$((waited + 5))
done

echo "ERROR: timed out waiting for current HiveServer2"
docker ps -a --filter "name=${SERVER_CONTAINER}" --format 'table {{.Names}}\t{{.Status}}\t{{.Image}}' || true
docker logs --tail 200 "${SERVER_CONTAINER}" || true
return 1
}

render_temp_compose() {
rm -rf "${WORKDIR}"
mkdir -p "${WORKDIR}"
mkdir -p "${TMPDIR}"
export TMPDIR
cp -r "${ROOT}" "${WORKDIR}/"

export CONTAINER_UID="${POC_CONTAINER_UID}"
export NEED_LOAD_DATA=0
export LOAD_PARALLEL
export IP_HOST
export FS_PORT HMS_PORT HS_PORT PG_PORT

pushd "${COMPOSE_DIR}" >/dev/null
envsubst < hadoop-hive.env.tpl > hadoop-hive-3x.env
envsubst < hadoop-hive-3x.env.tpl >> hadoop-hive-3x.env
envsubst < hive-3x.yaml.tpl > hive-3x.yaml
popd >/dev/null
}

run_poc_queries() {
local beeline_bin
beeline_bin="$(find_beeline)"
docker exec "${SERVER_CONTAINER}" bash -lc "
set -euo pipefail
${beeline_bin} -u 'jdbc:hive2://127.0.0.1:${HS_PORT}/default' -n root --showHeader=false --outputformat=tsv2 -e \"
drop table if exists hive3_cur_poc_orc;
create table hive3_cur_poc_orc(id int) stored as orc;
insert into hive3_cur_poc_orc values (1),(2),(3);
select count(*) from hive3_cur_poc_orc;
\"
"
}

main() {
trap cleanup EXIT

echo "INFO: rendering temporary current Hive3 POC under ${WORKDIR}"
render_temp_compose

echo "INFO: starting current Hive3 compose"
docker compose -f "${COMPOSE_FILE}" --env-file "${ENV_FILE}" down >/dev/null 2>&1 || true
docker compose -f "${COMPOSE_FILE}" --env-file "${ENV_FILE}" up -d --wait

wait_until_ready
run_poc_queries

echo "INFO: current Hive3 POC finished successfully"
}

main "$@"
Loading
Loading